From 3892863a001602bad5fa7560bfe374dab9b0ee05 Mon Sep 17 00:00:00 2001 From: jsl-models <74001263+jsl-models@users.noreply.github.com> Date: Sun, 8 Sep 2024 15:09:21 +0700 Subject: [PATCH] 2024-09-05-sent_arbertv2_ar (#14394) * Add model 2024-09-07-sent_retromae_msmarco_distill_en * Add model 2024-09-08-analisis_sentimientos_beto_tass_c_en * Add model 2024-09-08-indobert_sentiment_analysis_id * Add model 2024-09-07-spanish_finnish_extra_pipeline_en * Add model 2024-09-04-distilbert_finetuned_squadv2_fuutoru_en * Add model 2024-09-07-whisper_small_kurdish_sorani_10_pipeline_ku * Add model 2024-09-08-bert_imdb_pipeline_en * Add model 2024-09-08-linkbert_base_en * Add model 2024-09-07-burmese_awesome_qa_model_ravinderbrai_en * Add model 2024-09-08-custommodel_yelp_hanyundudddd_pipeline_en * Add model 2024-09-08-classification_model_mtebad_pipeline_en * Add model 2024-09-08-has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en * Add model 2024-09-07-burmese_awesome_qa_model_rahulcdeo_en * Add model 2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_mr * Add model 2024-09-06-danish_distilbert_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en * Add model 2024-09-08-distillbert_sentiment_analysis_en * Add model 2024-09-08-lenu_ewe_pipeline_en * Add model 2024-09-08-roberta_qa_QA_for_Event_Extraction_en * Add model 2024-09-08-mpnet_twitter_freq100_pipeline_en * Add model 2024-09-07-qa_ccc_model_pipeline_en * Add model 2024-09-07-burmese_awesome_qa_model_vikas12061995_pipeline_en * Add model 2024-09-08-setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en * Add model 2024-09-07-lab1_random_sfliao_pipeline_en * Add model 2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_en * Add model 2024-09-08-all_mpnet_base_v2_navteca_en * Add model 2024-09-08-all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en * Add model 2024-09-07-v2_mrcl0ud_pipeline_en * Add model 2024-09-08-setfit_model_ireland_3labels_balanced_data_en * Add model 2024-09-08-mpnet_base_nli_matryoshka_yoshinori_sano_en * Add model 2024-09-08-facets_gpt_35_pipeline_en * Add model 2024-09-08-all_mpnet_janet_10k_v1_en * Add model 2024-09-08-all_mpnet_janet_10k_v1_pipeline_en * Add model 2024-09-06-bert_base_multilingual_cased_finetuned_amharic_xx * Add model 2024-09-08-semanlink_all_mpnet_base_v2_en * Add model 2024-09-08-amazonpolarity_fewshot_en * Add model 2024-09-07-phowhisper_tiny_vinai_vi * Add model 2024-09-04-distil_bert_docred_ner_en * Add model 2024-09-07-arabic_bert_model_ar * Add model 2024-09-07-test_demo_qa_en * Add model 2024-09-08-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en * Add model 2024-09-06-pii_roberta_large_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_malagasy_en * Add model 2024-09-08-test999_en * Add model 2024-09-02-burmese_awesome_model_20wds_en * Add model 2024-09-07-burmese_awesome_model_akash24_en * Add model 2024-09-08-test999_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en * Add model 2024-09-05-burmese_awesome_wnut_place_pipeline_en * Add model 2024-09-08-sent_xlm_roberta_base_finetuned_questions_en * Add model 2024-09-06-burmese_awesome_qa_model_robinsh2023_pipeline_en * Add model 2024-09-07-whisper_gujarati_small_pipeline_gu * Add model 2024-09-07-llama_model_en * Add model 2024-09-04-deberta_classifier_feedback_1024_pseudo_final_pipeline_en * Add model 2024-09-06-opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en * Add model 2024-09-05-qa_synth_02_oct_with_finetune_1_1_en * Add model 2024-09-07-distilbert_finetuned_squadv2_thangduong0509_en * Add model 2024-09-04-roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en * Add model 2024-09-07-marian_finetuned_combined_dataset_1_1_pipeline_en * Add model 2024-09-08-bert_imdb_en * Add model 2024-09-07-run1_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_en * Add model 2024-09-06-distilbert_base_cased_finetuned_chunk_2_pipeline_en * Add model 2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_en * Add model 2024-09-07-greeklegalroberta_v2_pipeline_en * Add model 2024-09-08-bert_base_yelp_reviews_pipeline_en * Add model 2024-09-06-sent_neural_cherche_sparse_embed_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_en * Add model 2024-09-06-distilbert_finetuned_ai4privacy_v2_pipeline_en * Add model 2024-09-06-content_en * Add model 2024-09-08-cpu_netzero_classifier_pipeline_en * Add model 2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_pipeline_en * Add model 2024-09-07-distil_train_token_classification_nepal_bhasa_en * Add model 2024-09-08-gal_sayula_popoluca_iwcg_4_en * Add model 2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_en * Add model 2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_en * Add model 2024-09-07-cuad_distil_governing_law_08_28_v1_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en * Add model 2024-09-08-luganda_ner_v1_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en * Add model 2024-09-06-sungbeom_whisper_small_korean_set9_pipeline_ko * Add model 2024-09-05-turkish_base_bert_capitalization_correction_pipeline_tr * Add model 2024-09-08-gal_enptsp_xlm_r_gl * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_german_nitin1690_en * Add model 2024-09-08-multilingual_xlm_roberta_for_ner_c4n11_xx * Add model 2024-09-07-fresh_model_uncased_pipeline_en * Add model 2024-09-07-distilbert_base_uncased_squad2_lora_merged_jeukhwang_en * Add model 2024-09-08-gal_portuguese_xlm_r_pipeline_gl * Add model 2024-09-08-opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_french_goldenk_en * Add model 2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en * Add model 2024-09-07-setfit_model_independence_labelintl_epochs2_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_italian_leosol_en * Add model 2024-09-08-cat_ner_xlmr_4_en * Add model 2024-09-08-cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en * Add model 2024-09-08-setfit_model_ireland_3labels_balanced_data_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_en * Add model 2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no * Add model 2024-09-07-lm_ner_skills_extractor_bert_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en * Add model 2024-09-07-burmese_awesome_qa_model_markchiing_en * Add model 2024-09-08-afro_xlmr_base_finetuned_kintweetsb_en * Add model 2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en * Add model 2024-09-07-whisper_small_hindi_drinktoomuchsax_en * Add model 2024-09-07-burmese_awesome_wnut_model_halikuralde2_pipeline_en * Add model 2024-09-07-sent_turkish_tiny_bert_uncased_tr * Add model 2024-09-08-recommend_songs_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_niwang2024_en * Add model 2024-09-08-classification_model_sushant22_en * Add model 2024-09-08-intent_classifier_frana9812_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en * Add model 2024-09-08-trainer_output_dir_pipeline_en * Add model 2024-09-08-cm124057_01_en * Add model 2024-09-08-agnews_padding60model_en * Add model 2024-09-08-distilbert_coarse5_js_1_1_pipeline_en * Add model 2024-09-08-distilbert_coarse5_js_1_1_en * Add model 2024-09-08-bert_based_uncased_finetuned_imdb_en * Add model 2024-09-08-distilbert_tweet_pipeline_en * Add model 2024-09-08-multidim_default_template_en * Add model 2024-09-08-stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en * Add model 2024-09-07-bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en * Add model 2024-09-07-ner_newsagency_bert_french_pipeline_fr * Add model 2024-09-06-nusabert_base_pipeline_en * Add model 2024-09-08-bertoslav_limited_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_xxxxxcz_en * Add model 2024-09-07-wolof_finetuned_ner_pipeline_en * Add model 2024-09-08-usclm_distilbert_base_uncased_mk1_en * Add model 2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_en * Add model 2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_pipeline_en * Add model 2024-09-08-distilbert_base_english_greek_modern_russian_cased_pipeline_en * Add model 2024-09-06-xlm_roberta_base_panx_dataset_russian_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en * Add model 2024-09-08-maskedlm_finetuned_imdb_en * Add model 2024-09-08-distilbert_base_cased_distilbert_en * Add model 2024-09-08-imdb_distilbert_apoorvaec1030_en * Add model 2024-09-08-updated_distilbert_stance_detection_pipeline_en * Add model 2024-09-06-burmese_awesome_qa_model_nandyala12_en * Add model 2024-09-08-category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en * Add model 2024-09-08-quality_model_apr3_en * Add model 2024-09-08-joo_en * Add model 2024-09-08-resume_sentence_classifier_en * Add model 2024-09-08-clasificadorcorreosoportedistilespanol_pipeline_en * Add model 2024-09-08-hw_1_aia_tclin_en * Add model 2024-09-08-hw_1_aia_tclin_pipeline_en * Add model 2024-09-08-depression_detection_model_en * Add model 2024-09-08-distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx * Add model 2024-09-08-trainer1f_pipeline_en * Add model 2024-09-08-test_trainer4_en * Add model 2024-09-07-nuclear_medicine_daroberta_en * Add model 2024-09-08-distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en * Add model 2024-09-08-distilbert_movie_review_sentiment_classifier_3_pipeline_en * Add model 2024-09-08-tmp_trainer_ubermenchh_pipeline_en * Add model 2024-09-07-whisper_small_english_atco2_asr_pipeline_en * Add model 2024-09-08-all_mpnet_base_v2_topic_abstract_similarity_en * Add model 2024-09-08-xtremedistil_l6_h384_uncased_en * Add model 2024-09-08-xlm_roberta_base_finetuned_panx_german_french_alkampfer_en * Add model 2024-09-05-sbert_punc_case_russian_pipeline_ru * Add model 2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en * Add model 2024-09-08-setfit_model_ireland_4labels_unbalanced_data_3epochs_en * Add model 2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_en * Add model 2024-09-04-sent_marbert_ar * Add model 2024-09-06-paws_x_xlm_r_only_german_en * Add model 2024-09-08-twitter_roberta_base_topic_latest_en * Add model 2024-09-08-platzi_en * Add model 2024-09-08-roberta_base_emotion_pysentimiento_pipeline_en * Add model 2024-09-08-best_model_yelp_polarity_16_13_en * Add model 2024-09-08-roberta_soft_llm_multip_pipeline_en * Add model 2024-09-08-lexuz1_pipeline_en * Add model 2024-09-08-auro_4_pipeline_en * Add model 2024-09-08-auro_4_en * Add model 2024-09-08-tweetcat_pipeline_en * Add model 2024-09-08-roberta_news_classification_aparnaullas_pipeline_en * Add model 2024-09-08-bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en * Add model 2024-09-08-n2c2_soap_entailment_pipeline_en * Add model 2024-09-08-hw1_eva1209_en * Add model 2024-09-08-inde_4_en * Add model 2024-09-08-sota_4_pipeline_en * Add model 2024-09-08-testing_en * Add model 2024-09-03-n_roberta_twitterfin_padding60model_en * Add model 2024-09-08-w2l_en * Add model 2024-09-08-n_roberta_imdb_padding10model_pipeline_en * Add model 2024-09-08-trecdl22_crossencoder_roberta_pipeline_en * Add model 2024-09-08-w2l_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_emotion_lilvoda_en * Add model 2024-09-06-question_answering_tutorial_practice_en * Add model 2024-09-07-qa_model_fsghs_pipeline_en * Add model 2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_pipeline_en * Add model 2024-09-07-setfit_model_independence_labelintl_epochs2_en * Add model 2024-09-06-burmese_awesome_qa_model_yangyangsong_pipeline_en * Add model 2024-09-07-roberta_large_genia_ner_pipeline_en * Add model 2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en * Add model 2024-09-08-xlm_twitter_politics_sentiment_en * Add model 2024-09-08-xlm_roberta_sentiment_romanurdu_en * Add model 2024-09-08-rulebert_v0_4_k0_pipeline_it * Add model 2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en * Add model 2024-09-07-sent_tech_roberta_pipeline_vi * Add model 2024-09-03-finer_ord_transformers_2_en * Add model 2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en * Add model 2024-09-08-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en * Add model 2024-09-08-khmer_text_classification_roberta_km * Add model 2024-09-08-khmer_text_classification_roberta_pipeline_km * Add model 2024-09-08-xlm_roberta_base_final_mixed_aug_insert_bert_2_en * Add model 2024-09-08-mlm_jjk_subtitle_en * Add model 2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi * Add model 2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en * Add model 2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi * Add model 2024-09-08-predict_perception_xlmr_focus_assassin_en * Add model 2024-09-08-mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt * Add model 2024-09-08-finance_news_classifier_en * Add model 2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en * Add model 2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en * Add model 2024-09-08-xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr * Add model 2024-09-07-roberta_self_trained_pipeline_en * Add model 2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en * Add model 2024-09-07-distilbert_finetuned_ner_veronica1608_pipeline_en * Add model 2024-09-07-mpnet_base_natural_questions_mnsrl_pipeline_en * Add model 2024-09-08-argureviews_specificity_roberta_v1_pipeline_en * Add model 2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_ta * Add model 2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta * Add model 2024-09-08-test_trainer4_pipeline_en * Add model 2024-09-07-sent_telugu_bert_te * Add model 2024-09-08-multidim_romansh_reg_avg_balanced_default_template_en * Add model 2024-09-07-lab1_finetuning_daanjiri_pipeline_en * Add model 2024-09-08-sent_norwegian_bokml_roberta_base_scandi_1e4_en * Add model 2024-09-07-roberta_base_finetuned_neg_pipeline_en * Add model 2024-09-08-platzi_pipeline_en * Add model 2024-09-08-romanurduclassification_pipeline_en * Add model 2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa * Add model 2024-09-08-distilbert_base_uncased_finetuned_streamers_accelerate_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_majkeldcember_en * Add model 2024-09-08-distilbert_masking_1perc_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_marcosautuori_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_pbwinter_en * Add model 2024-09-08-distilbert_base_uncased_finetuned_imdb_lidiapierre_en * Add model 2024-09-08-atte_2_pipeline_en * Add model 2024-09-07-r_t_sms_lm_pipeline_en * Add model 2024-09-07-qa_iiitdmj_testing_en * Add model 2024-09-07-distilbert_base_uncased_finetuned_clinc_jeremygf_en --------- Co-authored-by: ahmedlone127 --- ...-09-01-deberta_v3_base_company_names_en.md | 94 ++++++++++++++ ...01-deberta_v3_large__sst2__train_8_2_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-01-expe_1_en.md | 94 ++++++++++++++ ...db_microsoft_deberta_v3_large_seed_1_en.md | 94 ++++++++++++++ ...dical_spanish_plantl_gob_es_pipeline_es.md | 70 ++++++++++ ...escription_topic_classifier_pipeline_en.md | 70 ++++++++++ ...-09-01-topic_labor_movement_pipeline_en.md | 70 ++++++++++ ...02-albert_base_qa_1_batch_1_pipeline_en.md | 69 ++++++++++ ...ert_base_qa_coqa_2_k_fold_2_pipeline_en.md | 69 ++++++++++ .../2024-09-02-bert_mini_uncased_en.md | 94 ++++++++++++++ ...24-09-02-burmese_awesome_model_20wds_en.md | 94 ++++++++++++++ ...rmese_awesome_model_lukiccc_pipeline_en.md | 70 ++++++++++ ...-09-02-burmese_awesome_model_rdsmaia_en.md | 94 ++++++++++++++ ...s22_10_camembert_pretrained_pipeline_en.md | 70 ++++++++++ .../2024-09-02-distil_task_b_2_pipeline_en.md | 70 ++++++++++ ...ncased_finetuned_lgbt_classification_en.md | 94 ++++++++++++++ ...distilbert_nsfw_appropriate_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_cyberbullying_pipeline_en.md | 70 ++++++++++ .../2024-09-02-distilbert_uncased_names_en.md | 94 ++++++++++++++ ...-dummy_model_ankush_chander_pipeline_en.md | 70 ++++++++++ ...dummy_model_jonathanlin0707_pipeline_en.md | 70 ++++++++++ .../2024-09-02-dummy_model_jongyeop_en.md | 94 ++++++++++++++ .../2024-09-02-dummy_model_mindnetml_en.md | 94 ++++++++++++++ .../2024-09-02-dummy_model_zonepg_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-02-e5_90k_en.md | 86 +++++++++++++ .../2024-09-02-emotion_recognition_en.md | 94 ++++++++++++++ ...024-09-02-english_hebrew_modern_base_en.md | 94 ++++++++++++++ ...ian_finetuned_pro_emit_big8_pipeline_en.md | 70 ++++++++++ ...e_article_titles_classifier_pipeline_en.md | 70 ++++++++++ ...ng_emotion_model_dearkarina_pipeline_en.md | 70 ++++++++++ ...-09-02-ganda_english_ai_lab_makerere_en.md | 94 ++++++++++++++ ...mdb_distilbert_base_uncased_pipeline_en.md | 70 ++++++++++ .../2024-09-02-indo_roberta_small_id.md | 94 ++++++++++++++ ...9-02-marianmt_hin_eng_czech_pipeline_hi.md | 70 ++++++++++ ...rt_uncased_finetuned_squadv1_mrm8488_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-02-model_en.md | 86 +++++++++++++ ...net_base_nli_adaptive_layer_pipeline_en.md | 69 ++++++++++ ...a_tonga_islands_romanian_pankaj10034_en.md | 94 ++++++++++++++ ..._norwegian_warmup_best_bleu_pipeline_en.md | 70 ++++++++++ ...-02-personal_whisper_distilled_model_en.md | 84 ++++++++++++ .../2024-09-02-readabert_french_fr.md | 94 ++++++++++++++ ...a_base_tweetner_2020_2021_continuous_en.md | 112 ++++++++++++++++ ...sh_squades_becasincentivos4_pipeline_es.md | 69 ++++++++++ .../2024-09-02-securebert_cyner_en.md | 100 +++++++++++++++ ...9-02-sent_bert_base_greek_uncased_v1_el.md | 94 ++++++++++++++ ...-02-sent_bert_large_portuguese_cased_pt.md | 94 ++++++++++++++ ...amembert_base_ccnet_stsb200_pipeline_en.md | 70 ++++++++++ ...sloberta_slo_word_spelling_annotator_sl.md | 94 ++++++++++++++ .../2024-09-02-temp_checkpoints_en.md | 94 ++++++++++++++ ...ndom_clipmodel_hf_tiny_model_private_en.md | 120 ++++++++++++++++++ .../2024-09-02-transformer_maltese_en.md | 94 ++++++++++++++ .../2024-09-02-turkish2_pipeline_en.md | 70 ++++++++++ ...er_base_quran_ai_by_tarteel_pipeline_en.md | 69 ++++++++++ ...-09-02-whisper_small_english_jenrish_en.md | 84 ++++++++++++ ...whisper_small_taiwanese_minnan_take2_en.md | 84 ++++++++++++ .../ahmedlone127/2024-09-03-accu_0_en.md | 94 ++++++++++++++ ...24-09-03-aigc_detector_env1_pipeline_en.md | 70 ++++++++++ .../2024-09-03-al_roberta_base_en.md | 94 ++++++++++++++ ...2024-09-03-albert_base_qa_2_k_fold_3_en.md | 86 +++++++++++++ ...-all_mpnet_base_v2_eclass_gart_labor_en.md | 86 +++++++++++++ ...v2_lr_2e_7_margin_1_epoch_3_pipeline_en.md | 69 ++++++++++ ...e_v2_southern_sotho_out_sim_pipeline_en.md | 69 ++++++++++ ...mpnet_base_v2_sts_juanignaciosolerno_en.md | 86 +++++++++++++ ...otrain_okr_iptal_3196789879_pipeline_en.md | 70 ++++++++++ ...ai_bge_large_english_v1_5_fine_tuned_en.md | 87 +++++++++++++ ...sking_seed3_finetuned_squad_pipeline_en.md | 69 ++++++++++ ...se_uncased_finetuned_squad_frozen_v1_en.md | 86 +++++++++++++ .../2024-09-03-bert_categorizer_en.md | 94 ++++++++++++++ ...2024-09-03-bert_categorizer_pipeline_en.md | 70 ++++++++++ ...e_base_financial_matryoshka_dustyatx_en.md | 87 +++++++++++++ ...ge_small_english_dcpr_tuned_teachafy_en.md | 87 +++++++++++++ ...ll_english_v1_5_ft_orc_0813_pipeline_en.md | 69 ++++++++++ ...d_webnlg2020_metric_average_pipeline_en.md | 69 ++++++++++ .../2024-09-03-bm_french_pipeline_bm.md | 70 ++++++++++ ...024-09-03-brahmai_clip_v0_1_pipeline_en.md | 69 ++++++++++ ...burmese_awesome_model_asmiishripad18_en.md | 94 ++++++++++++++ ...-09-03-burmese_fine_tuned_distilbert_en.md | 94 ++++++++++++++ ...3-classificateur_intention_camembert_en.md | 94 ++++++++++++++ ...3-cleaned_bert_base_cased_500_620e5b_en.md | 86 +++++++++++++ ...-03-clip_vit_large_patch14_baseplate_en.md | 120 ++++++++++++++++++ ...2024-09-03-coptic_english_translator_en.md | 94 ++++++++++++++ ...der_camembert_base_mmarcofr_pipeline_fr.md | 70 ++++++++++ ...coder_camembert_l4_mmarcofr_pipeline_fr.md | 70 ++++++++++ ...r_xlm_roberta_base_mmarcofr_pipeline_fr.md | 70 ++++++++++ ...opus_maltese_german_english_pipeline_en.md | 70 ++++++++++ .../2024-09-03-deberta_v3_base_pipeline_en.md | 70 ++++++++++ ..._finetuned_mlm_accelerate_v3_02_xp_1_en.md | 94 ++++++++++++++ .../2024-09-03-deberta_v3_large_hf_llm_en.md | 94 ++++++++++++++ ...femicide_news_xlmr_dutch_fft_freeze2_en.md | 94 ++++++++++++++ ...ngual_cased_finetuned_bible_pipeline_xx.md | 70 ++++++++++ ...e_multilingual_cased_finetuned_bible_xx.md | 94 ++++++++++++++ ...eletion_multiclass_complete_final_v2_en.md | 94 ++++++++++++++ ...stilbert_base_uncased_edu_classifier_en.md | 94 ++++++++++++++ ...rt_base_uncased_finetuned_ag_news_v5_en.md | 94 ++++++++++++++ ..._uncased_finetuned_emotion_aliramikh_en.md | 94 ++++++++++++++ ...finetuned_emotion_aliramikh_pipeline_en.md | 70 ++++++++++ ...uncased_finetuned_emotion_iamsubrata_en.md | 94 ++++++++++++++ ...netuned_emotion_pulpilisory_pipeline_en.md | 70 ++++++++++ ...uned_imdb_accelerate_cxbn12_pipeline_en.md | 70 ++++++++++ ...ased_finetuned_imdb_accelerate_jhhan_en.md | 94 ++++++++++++++ ...ased_finetuned_imdb_ddn0116_pipeline_en.md | 70 ++++++++++ ...ased_finetuned_imdb_gertjanvanderwel_en.md | 94 ++++++++++++++ ..._base_uncased_finetuned_imdb_mie_zhz_en.md | 94 ++++++++++++++ ...ased_finetuned_imdb_mie_zhz_pipeline_en.md | 70 ++++++++++ ...uncased_finetuned_imdb_mongdiutindei_en.md | 94 ++++++++++++++ ...ed_finetuned_imdb_qiyuan123_pipeline_en.md | 70 ++++++++++ ...ed_finetuned_imdb_rohit5895_pipeline_en.md | 70 ++++++++++ ..._base_uncased_finetuned_imdb_sdinger_en.md | 94 ++++++++++++++ ...ase_uncased_finetuned_imdb_shenberg1_en.md | 94 ++++++++++++++ ...sed_finetuned_react_content_pipeline_en.md | 70 ++++++++++ ...nthetic_finetuned_synthetic_pipeline_en.md | 70 ++++++++++ ...-03-distilbert_finetuned_imdb_indah1_en.md | 94 ++++++++++++++ ...finetuned_imdb_prateekag159_pipeline_en.md | 70 ++++++++++ ...3-distilbert_persian_farsi_zwnj_base_fa.md | 94 ++++++++++++++ ...ert_persian_farsi_zwnj_base_pipeline_fa.md | 70 ++++++++++ ...3-distilbert_tokenizer_256k_mlm_best_en.md | 94 ++++++++++++++ ...ert_yelp_sentiment_analysis_pipeline_en.md | 70 ++++++++++ ...cond_train_set_french_false_pipeline_en.md | 70 ++++++++++ .../2024-09-03-distilgreek_bert_el.md | 94 ++++++++++++++ .../2024-09-03-distilkobert_ft_en.md | 94 ++++++++++++++ .../2024-09-03-distilroberta_sst2_en.md | 94 ++++++++++++++ .../2024-09-03-dlfbert_pipeline_en.md | 70 ++++++++++ ...-09-03-dummy_model_edge2992_pipeline_en.md | 70 ++++++++++ .../2024-09-03-dummy_model_mhrecaldeb_en.md | 94 ++++++++++++++ ...-03-e5_large_v2_vectoriseai_pipeline_en.md | 69 ++++++++++ ...-09-03-embedded_e5_base_500_pipeline_en.md | 69 ++++++++++ ...-09-03-emotion_amaniabuzaid_pipeline_en.md | 70 ++++++++++ .../2024-09-03-english_tamil_translator_en.md | 94 ++++++++++++++ ...glish_tonga_tonga_islands_ganda_nllb_en.md | 94 ++++++++++++++ ...onga_islands_turkish_finetuned_model_en.md | 94 ++++++++++++++ ...witter_roberta_base_sentiment_latest_en.md | 94 ++++++++++++++ ...berta_base_sentiment_latest_pipeline_en.md | 70 ++++++++++ .../2024-09-03-finer_ord_transformers_2_en.md | 94 ++++++++++++++ ...iling_corrected_aligned_20e_pipeline_en.md | 70 ++++++++++ ...nsformers_multi_qa_mpnet_base_dot_v1_en.md | 86 +++++++++++++ ...03-finetuned_twitter_profane_roberta_en.md | 94 ++++++++++++++ ..._model_3000_samples_parth05_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-03-ft_clone_en.md | 94 ++++++++++++++ .../2024-09-03-ft_clone_pipeline_en.md | 70 ++++++++++ ...se_unsupervised_scifact_k10_pipeline_en.md | 69 ++++++++++ ...9-03-gpl_e5_base_unsupervised_test_1_en.md | 86 +++++++++++++ ...e_e5_base_unsupervised_test_1_d165d6_en.md | 86 +++++++++++++ ..._unsupervised_test_1_d165d6_pipeline_en.md | 69 ++++++++++ ...-03-helsinki_altp_indonesian_english_en.md | 94 ++++++++++++++ ...languages_english_opus100_accelerate_en.md | 94 ++++++++++++++ .../2024-09-03-industry_classification_en.md | 94 ++++++++++++++ ...-03-industry_classification_pipeline_en.md | 70 ++++++++++ ...9-03-khmer_xlm_roberta_base_pipeline_km.md | 70 ++++++++++ ...09-03-lab1_random_coloteong_pipeline_en.md | 70 ++++++++++ ...on_xlm_roberta_base_ivanlau_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-03-legalevalrr_en.md | 86 +++++++++++++ ...a_tonga_islands_kinyarwanda_pipeline_en.md | 70 ++++++++++ ...a_islands_portuguese_breton_pipeline_en.md | 70 ++++++++++ ...09-03-mdeberta_expl_extraction_multi_en.md | 94 ++++++++++++++ ...ta_v3_base_finetuned_sayula_popoluca_en.md | 94 ++++++++++++++ .../2024-09-03-mdeberta_v3_base_nubes_es.md | 94 ++++++++++++++ .../2024-09-03-medical_pubmed_8_2_en.md | 94 ++++++++++++++ .../2024-09-03-medrurobertalarge_ru.md | 94 ++++++++++++++ ...castellon_castellon_30_docs_pipeline_en.md | 70 ++++++++++ ...a_v3_large_ner_conll2003_latin_fe_v2_en.md | 94 ++++++++++++++ ...se_snli_mnli_finetuned_mnli_pipeline_en.md | 70 ++++++++++ ...rozen_newtriplets_v2_lr_2e_5_m_1_e_3_en.md | 86 +++++++++++++ ...-n_roberta_twitterfin_padding60model_en.md | 94 ++++++++++++++ .../2024-09-03-nuner_v2_0_pipeline_en.md | 70 ++++++++++ ...rse_encoding_doc_v2_distill_pipeline_en.md | 70 ++++++++++ ...onga_islands_italian_enimai_pipeline_en.md | 70 ++++++++++ ...sh_tonga_tonga_islands_thai_chayawat_en.md | 94 ++++++++++++++ ...tonga_islands_thai_chayawat_pipeline_en.md | 70 ++++++++++ ...4-09-03-roberta_base_danish_pipeline_da.md | 70 ++++++++++ .../2024-09-03-roberta_base_russian_v0_ru.md | 94 ++++++++++++++ ...nlp_persian_farsi_473312409_pipeline_en.md | 70 ++++++++++ ...erta_cwe_classifier_kelemia_pipeline_en.md | 70 ++++++++++ ..._embeddings_amharic_roberta_pipeline_am.md | 70 ++++++++++ ...rge_financial_news_sentiment_english_en.md | 94 ++++++++++++++ ...a_large_finnish_finnish_nlp_pipeline_fi.md | 70 ++++++++++ ...1024_finetuned_squad_seed_2_pipeline_en.md | 69 ++++++++++ .../2024-09-03-robertachem_pipeline_en.md | 70 ++++++++++ ...anguage_whole_word_phonetic_pipeline_en.md | 71 +++++++++++ ...r_mini_finetuned_kintweetsd_pipeline_en.md | 71 +++++++++++ ...ert_base_finnish_uncased_v1_pipeline_fi.md | 71 +++++++++++ ...24-09-03-sent_bert_kor_base_pipeline_ko.md | 71 +++++++++++ ...clinicalbert_emilyalsentzer_pipeline_en.md | 71 +++++++++++ .../2024-09-03-sent_memo_final_pipeline_en.md | 71 +++++++++++ .../2024-09-03-sent_radbert_pipeline_en.md | 71 +++++++++++ ...nt_twitter_xlm_roberta_base_pipeline_en.md | 71 +++++++++++ ...-03-sent_xlm_roberta_base_facebookai_xx.md | 94 ++++++++++++++ ...oberta_base_finetuned_wolof_pipeline_en.md | 71 +++++++++++ ...-sentiment_analysis_wangyh6_pipeline_en.md | 70 ++++++++++ ...glelabelrecommendationmodel_pipeline_en.md | 70 ++++++++++ ...sitexsometre_camembert_large_stsb100_en.md | 94 ++++++++++++++ .../2024-09-03-splade_v3_distilbert_en.md | 94 ++++++++++++++ ..._uncased_finetuned_squad_v2_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-03-surgicberta_en.md | 94 ++++++++++++++ ...9-03-takalane_northern_sotho_roberta_en.md | 94 ++++++++++++++ .../2024-09-03-tokenizerlabeller_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-03-topic_obits_en.md | 94 ++++++++++++++ .../2024-09-03-topic_politics_pipeline_en.md | 70 ++++++++++ ...-09-03-trained_model_distilbert_0305_en.md | 94 ++++++++++++++ .../2024-09-03-transformer_pipeline_en.md | 69 ++++++++++ ...tuned_english_tonga_tonga_islands_jp_en.md | 94 ++++++++++++++ ..._base_dec2020_tweet_topic_multi_2020_en.md | 94 ++++++++++++++ ...witter_roberta_base_sep2020_pipeline_en.md | 70 ++++++++++ ...base_topic_sentiment_latest_pipeline_en.md | 70 ++++++++++ ...itter_sentiment_analysis_v2_pipeline_en.md | 70 ++++++++++ ...ndd_pagekit_test_tags_cwadj_pipeline_en.md | 70 ++++++++++ .../2024-09-03-xlm_r_galen_meddocan_es.md | 94 ++++++++++++++ ..._r_with_transliteration_max_pipeline_en.md | 70 ++++++++++ ...gument_visquad2_15_3_2023_1_pipeline_en.md | 69 ++++++++++ ...erta_base_finetuned_clinais_pipeline_en.md | 70 ++++++++++ ...se_finetuned_marc_english_test_rundi_en.md | 94 ++++++++++++++ ...etuned_panx_all_the_neural_networker_en.md | 94 ++++++++++++++ ...ta_base_lcc_english_2e_5_42_pipeline_en.md | 70 ++++++++++ ...oberta_base_longformer_4096_pipeline_en.md | 70 ++++++++++ ...base_squad2_idkmrc_clickbaitspoiling_en.md | 86 +++++++++++++ ...roberta_large_qa_norwegian_eanderson_en.md | 86 +++++++++++++ ...uad_nepali_translated_squad_pipeline_en.md | 69 ++++++++++ ...a_ner_base_finetuned_naija_pipeline_pcm.md | 70 ++++++++++ ...ta_ner_hugsao123_base_finetuned_panx_de.md | 113 +++++++++++++++++ ...transformersbook_base_finetuned_panx_de.md | 115 +++++++++++++++++ ...eberta_v3_base_finetuned_yahoo_80_20_en.md | 94 ++++++++++++++ ..._base_finetuned_yahoo_80_20_pipeline_en.md | 70 ++++++++++ ...4-32_shot_twitter_2classes_head_body_en.md | 86 +++++++++++++ ...09-04-600_stmodel_brand_rem_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-04-aditya_ner_en.md | 94 ++++++++++++++ .../2024-09-04-aditya_ner_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_hausa_2e_4_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-akai_ner_en.md | 92 ++++++++++++++ .../2024-09-04-albert_base_chinese_ws_zh.md | 94 ++++++++++++++ ...09-04-albert_base_qa_coqa_1_pipeline_en.md | 69 ++++++++++ ...v2_finetuned_ner_minhminh09_pipeline_en.md | 70 ++++++++++ ...ert_base_v2_rotten_tomatoes_pipeline_en.md | 70 ++++++++++ ...bert_base_v2_rte_textattack_pipeline_en.md | 70 ++++++++++ ...lbert_persian_farsi_base_v2_pipeline_fa.md | 70 ++++++++++ ...ersian_farsi_base_v2_sentiment_multi_fa.md | 94 ++++++++++++++ ...4-09-04-albert_small_kor_v1_pipeline_en.md | 70 ++++++++++ ...4-09-04-all_mpnet_base_v2_1_pipeline_en.md | 69 ++++++++++ ..._airdialogue_unlabelled_and_labelled_en.md | 86 +++++++++++++ ..._tuned_epochs_8_binhcode25_finetuned_en.md | 86 +++++++++++++ ...ase_v2_fine_tuned_epochs_8_event_nlp_en.md | 86 +++++++++++++ ...net_base_v2_firefox_margin_1_epoch_1_en.md | 86 +++++++++++++ ...v2_firefox_margin_1_epoch_1_pipeline_en.md | 69 ++++++++++ ...4-09-04-all_mpnet_base_v2_survey3000_en.md | 86 +++++++++++++ ...swer_equivalence_distilbert_zli12321_en.md | 94 ++++++++++++++ ...valence_distilbert_zli12321_pipeline_en.md | 70 ++++++++++ ...eviews_component_deberta_v1_pipeline_en.md | 70 ++++++++++ .../2024-09-04-astroentities_pipeline_en.md | 70 ++++++++++ ...lp_covid_fake_news_36839110_pipeline_en.md | 70 ++++++++++ ...utotrain_3_xlmr_fulltext_53881126794_en.md | 94 ++++++++++++++ .../2024-09-04-autotrain_htyqd_ivazp_en.md | 94 ++++++++++++++ ...09-04-autotrain_htyqd_ivazp_pipeline_en.md | 70 ++++++++++ ...04-babyberta_wiki_finetuned_squad_v1_en.md | 86 +++++++++++++ ...rta_wiki_finetuned_squad_v1_pipeline_en.md | 69 ++++++++++ ...masking_run3_finetuned_qamr_pipeline_en.md | 69 ++++++++++ ...ta_wikipedia_2_5_0_1_finetuned_qasrl_en.md | 86 +++++++++++++ ...dia_2_5_0_1_finetuned_qasrl_pipeline_en.md | 69 ++++++++++ ...er_anglicisms_spanish_mbert_pipeline_es.md | 70 ++++++++++ ...ner_craft_augmented_english_pipeline_xx.md | 70 ++++++++++ .../2024-09-04-bert_ner_skills_en.md | 94 ++++++++++++++ .../2024-09-04-bert_ner_skills_pipeline_en.md | 70 ++++++++++ ...analysis_sravni_russian_corp_russian_en.md | 94 ++++++++++++++ ...sravni_russian_corp_russian_pipeline_en.md | 70 ++++++++++ ...ert_sayula_popoluca_estbert_xpos_128_en.md | 94 ++++++++++++++ ...uence_classifier_coronabert_pipeline_en.md | 70 ++++++++++ ..._128k_keyword_discriminator_pipeline_tr.md | 70 ++++++++++ ...4-bertimbau_large_ner_total_pipeline_pt.md | 70 ++++++++++ .../2024-09-04-bertweetfr_base_fr.md | 94 ++++++++++++++ .../2024-09-04-bertweetfr_base_pipeline_fr.md | 70 ++++++++++ ...09-04-best_model_yelp_polarity_32_87_en.md | 94 ++++++++++++++ ...t_model_yelp_polarity_32_87_pipeline_en.md | 70 ++++++++++ ...-09-04-beto_finetuned_ner_3_pipeline_es.md | 70 ++++++++++ ..._token_classification_model_pipeline_en.md | 70 ++++++++++ ...ir_roberta_base_generative_sentiment_en.md | 94 ++++++++++++++ ...024-09-04-bob_oriya_not_bob_pipeline_en.md | 70 ++++++++++ .../2024-09-04-book_recognizer_pipeline_en.md | 70 ++++++++++ .../2024-09-04-bsc_bio_spanish_es.md | 94 ++++++++++++++ ...4-burmese_awesome_health_qa_model_35_en.md | 86 +++++++++++++ ..._awesome_health_qa_model_35_pipeline_en.md | 69 ++++++++++ ...4-burmese_awesome_model_2_nicolehao7_en.md | 94 ++++++++++++++ ...ese_awesome_qa_model_40_len_pipeline_en.md | 69 ++++++++++ ...-burmese_awesome_wnut_actor_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_all_time_pipeline_en.md | 70 ++++++++++ ...04-burmese_awesome_wnut_model_adisur_en.md | 94 ++++++++++++++ ...ese_awesome_wnut_model_aditya_jindal_en.md | 94 ++++++++++++++ ...me_wnut_model_aditya_jindal_pipeline_en.md | 70 ++++++++++ ...awesome_wnut_model_almifosa_pipeline_en.md | 70 ++++++++++ ...-burmese_awesome_wnut_model_asrajgct_en.md | 94 ++++++++++++++ ...se_awesome_wnut_model_charliefederer_en.md | 94 ++++++++++++++ ...esome_wnut_model_claire5776_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_diodiodada_en.md | 94 ++++++++++++++ ...4-burmese_awesome_wnut_model_gaogao8_en.md | 94 ++++++++++++++ ..._awesome_wnut_model_hcy5561_pipeline_en.md | 70 ++++++++++ ...esome_wnut_model_hrodriguez_pipeline_en.md | 70 ++++++++++ ..._awesome_wnut_model_lmattes_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_model_malduwais_en.md | 94 ++++++++++++++ ...esome_wnut_model_minhminh09_pipeline_en.md | 70 ++++++++++ ...model_moumitanettojanamanna_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_model_portokali_en.md | 94 ++++++++++++++ ...wesome_wnut_model_portokali_pipeline_en.md | 70 ++++++++++ ...some_wnut_model_robinsh2023_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_rw2614_pipeline_en.md | 70 ++++++++++ ...mese_awesome_wnut_model_svangorden13_en.md | 94 ++++++++++++++ ...ome_wnut_model_svangorden13_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_urisoo_pipeline_en.md | 70 ++++++++++ ...04-burmese_awesome_wnut_model_wzchen_en.md | 94 ++++++++++++++ ...e_awesome_wnut_saprotection_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_target_pipeline_en.md | 70 ++++++++++ ...04-burmese_bert_qa_model_05_pipeline_en.md | 69 ++++++++++ ...024-09-04-burmese_ner_model_atajan99_en.md | 94 ++++++++++++++ ...-burmese_ner_model_atajan99_pipeline_en.md | 70 ++++++++++ ...4-09-04-burmese_ner_model_delphine18_en.md | 94 ++++++++++++++ ...urmese_ner_model_delphine18_pipeline_en.md | 70 ++++++++++ ...-burmese_ner_model_luccaaug_pipeline_en.md | 70 ++++++++++ ...2024-09-04-camembert_base_dataikunlp_fr.md | 94 ++++++++++++++ ...assifier_only_french_lr1e_3_pipeline_en.md | 70 ++++++++++ .../2024-09-04-camembert_mlm_pipeline_en.md | 70 ++++++++++ ...024-09-04-classify_isin_step7_binary_en.md | 94 ++++++++++++++ ...-classify_isin_step7_binary_pipeline_en.md | 70 ++++++++++ ...nicalbert_bionlp13cg_ner_nepal_bhasa_en.md | 94 ++++++++++++++ ...atch16_supervised_mulitilingual_1600_en.md | 120 ++++++++++++++++++ ...pervised_mulitilingual_1600_pipeline_en.md | 69 ++++++++++ ...024-09-04-clip_crop_disease_pipeline_en.md | 69 ++++++++++ .../2024-09-04-clip_demo_pipeline_en.md | 69 ++++++++++ .../2024-09-04-clip_vit_base_patch16_en.md | 120 ++++++++++++++++++ ...09-04-clip_vit_base_patch16_pipeline_en.md | 69 ++++++++++ .../2024-09-04-clip_vit_base_patch322_en.md | 120 ++++++++++++++++++ ...t_base_patch32_demo_rvignav_pipeline_en.md | 69 ++++++++++ ...lip_vit_l_14_laion2b_s32b_b82k_laion_en.md | 120 ++++++++++++++++++ ..._vit_large_patch14_336_q_mm_pipeline_en.md | 69 ++++++++++ ...h14_finetuned_dresser_sofas_pipeline_en.md | 69 ++++++++++ ...-04-clip_vit_large_patch14_superlore_en.md | 120 ++++++++++++++++++ ...vit_large_patch14_superlore_pipeline_en.md | 69 ++++++++++ ...-clip_vit_large_patch14_trainformeta_en.md | 120 ++++++++++++++++++ ..._large_patch14_trainformeta_pipeline_en.md | 69 ++++++++++ .../2024-09-04-codebert_python_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-04-cpegen_pv_en.md | 94 ++++++++++++++ .../2024-09-04-cpegen_pv_pipeline_en.md | 70 ++++++++++ .../2024-09-04-cpegen_vv_pipeline_en.md | 70 ++++++++++ .../2024-09-04-craft_clinicalbert_ner_en.md | 94 ++++++++++++++ .../2024-09-04-cree_fewshot_pipeline_en.md | 69 ++++++++++ ...-cross_encoder_stsb_deberta_v3_large_en.md | 94 ++++++++++++++ ...ta_wolof_search_mix_epoch_3_pipeline_en.md | 69 ++++++++++ .../2024-09-04-dagpap24_deberta_base_ft_en.md | 94 ++++++++++++++ ...inerdistilbert_fullsequence_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-db_fe_2_1_en.md | 94 ++++++++++++++ .../2024-09-04-db_fe_2_1_pipeline_en.md | 70 ++++++++++ ...dbert_pii_detection_model_omshikhare_en.md | 94 ++++++++++++++ ...a_amazon_reviews_v1_patrickvonplaten_en.md | 94 ++++++++++++++ ...reviews_v1_patrickvonplaten_pipeline_en.md | 70 ++++++++++ ...24-09-04-deberta_base_german_fluency_en.md | 94 ++++++++++++++ ..._metaphor_detection_english_pipeline_en.md | 70 ++++++++++ ...4-deberta_classification_base_prompt_en.md | 111 ++++++++++++++++ ..._feedback_1024_pseudo_final_pipeline_en.md | 70 ++++++++++ ...berta_docnli_sentencelevel_ner_claim_en.md | 94 ++++++++++++++ ...nli_sentencelevel_ner_claim_pipeline_en.md | 70 ++++++++++ .../2024-09-04-deberta_finetune_en.md | 94 ++++++++++++++ ...2024-09-04-deberta_finetune_pipeline_en.md | 70 ++++++++++ ...9-04-deberta_sentencelevel_ner_claim_en.md | 94 ++++++++++++++ ...rta_sentencelevel_ner_claim_pipeline_en.md | 70 ++++++++++ .../2024-09-04-deberta_small_22feb_en.md | 94 ++++++++++++++ ...-deberta_tomatoes_sentiment_voodoo72_en.md | 94 ++++++++++++++ ..._japanese_finetuned_emotion_pipeline_en.md | 70 ++++++++++ ...rta_v3_base_10xp3_10xc4_128_pipeline_en.md | 70 ++++++++++ ...-09-04-deberta_v3_base_1107_pipeline_en.md | 70 ++++++++++ ...4-deberta_v3_base_ai4privacy_english_en.md | 94 ++++++++++++++ ...erta_v3_base_civil_comments_wilds_5k_en.md | 94 ++++++++++++++ ...ase_civil_comments_wilds_5k_pipeline_en.md | 70 ++++++++++ ...-deberta_v3_base_cola_yevheniimaslov_en.md | 94 ++++++++++++++ ...v3_base_cola_yevheniimaslov_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_bluegennx_run2_19_5e_en.md | 94 ++++++++++++++ ...-04-deberta_v3_base_finetuned_french_en.md | 94 ++++++++++++++ ...ase_finetuned_mcqa_manyet1k_pipeline_en.md | 70 ++++++++++ .../2024-09-04-deberta_v3_base_qnli_en.md | 94 ++++++++++++++ .../2024-09-04-deberta_v3_base_sst2_en.md | 94 ++++++++++++++ ...4-09-04-deberta_v3_base_whatsapp_ner_en.md | 94 ++++++++++++++ ...eberta_v3_base_whatsapp_ner_pipeline_en.md | 70 ++++++++++ ...erta_v3_base_zeroshot_v2_0_28heldout_en.md | 94 ++++++++++++++ ...rge_ad_opentag_finetuned_ner_5epochs_en.md | 94 ++++++++++++++ ...24-09-04-deberta_v3_large_classifier_en.md | 94 ++++++++++++++ ...24-09-04-deberta_v3_large_fever_pepa_en.md | 94 ++++++++++++++ ...e_finetuned_ner_10epochs_v2_pipeline_en.md | 70 ++++++++++ ..._finetuned_synthetic_paraphrase_only_en.md | 94 ++++++++++++++ ...d_synthetic_paraphrase_only_pipeline_en.md | 70 ++++++++++ ...-deberta_v3_large_sentiment_pipeline_en.md | 70 ++++++++++ ...ain_passage_old_facts_rater_all_gpt4_en.md | 94 ++++++++++++++ ...ted_passage_old_facts_rater_all_gpt4_en.md | 94 ++++++++++++++ ..._smallsed_rte_finetuned_rte_pipeline_en.md | 70 ++++++++++ ...9-04-deberta_v3_xsmall_mnli_pipeline_en.md | 70 ++++++++++ ...2024-09-04-deberta_xlarge_em_abt_buy_en.md | 94 ++++++++++++++ ...4-deberta_xlarge_em_abt_buy_pipeline_en.md | 70 ++++++++++ ...024-09-04-debertabaseemotionbalanced_en.md | 94 ++++++++++++++ ...-debertabaseemotionbalanced_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-deep_2_en.md | 94 ++++++++++++++ ..._balanced_distilbert_base_uncased_v2_en.md | 94 ++++++++++++++ ...09-04-disbert_finetune_for_gentriple_en.md | 94 ++++++++++++++ .../2024-09-04-distil_bert_docred_ner_en.md | 94 ++++++++++++++ ...ased_finetuned_conll2003_english_ner_en.md | 94 ++++++++++++++ ...ilbert_base_cased_finetuned_pipeline_en.md | 70 ++++++++++ ...bert_base_cased_pii_english_pipeline_en.md | 70 ++++++++++ ...istilbert_base_data_wnut_17_pipeline_en.md | 70 ++++++++++ ...bert_base_english_greek_modern_cased_en.md | 94 ++++++++++++++ ..._english_portuguese_spanish_pipeline_xx.md | 70 ++++++++++ ...ed_finetuned_adl_hw1_russianroulette_en.md | 94 ++++++++++++++ ...ned_adl_hw1_russianroulette_pipeline_en.md | 70 ++++++++++ ...e_uncased_finetuned_clinc_aicoder009_en.md | 94 ++++++++++++++ ..._finetuned_clinc_aicoder009_pipeline_en.md | 70 ++++++++++ ..._uncased_finetuned_clinc_einsteinkim_en.md | 94 ++++++++++++++ ...ncased_finetuned_cola_dev2k_pipeline_en.md | 70 ++++++++++ ...tuned_emotion_wzy1924561588_pipeline_en.md | 70 ++++++++++ ...ncased_finetuned_finer_test_pipeline_en.md | 70 ++++++++++ ...bert_base_uncased_finetuned_imdb1004_en.md | 94 ++++++++++++++ ...sed_finetuned_imdb_ce_kishi_pipeline_en.md | 70 ++++++++++ ...t_base_uncased_finetuned_imdb_dvijay_en.md | 94 ++++++++++++++ ...cased_finetuned_imdb_dvijay_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_imdb_greyfoss_en.md | 94 ++++++++++++++ ..._base_uncased_finetuned_imdb_jaybdev_en.md | 94 ++++++++++++++ ...ased_finetuned_imdb_jaybdev_pipeline_en.md | 70 ++++++++++ ...ert_base_uncased_finetuned_imdb_r0in_en.md | 94 ++++++++++++++ ...t_base_uncased_finetuned_imdb_sbulut_en.md | 94 ++++++++++++++ ...inetuned_imdb_shahzebnaveed_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_imdb_thepines_en.md | 94 ++++++++++++++ ...sed_finetuned_imdb_thepines_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_imdb_walterg777_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_masakhanenews_en.md | 94 ++++++++++++++ ...finetuned_ner_anuroopkeshav_pipeline_en.md | 70 ++++++++++ ...e_uncased_finetuned_ner_cadec_active_en.md | 94 ++++++++++++++ ..._base_uncased_finetuned_ner_chuqiaog_en.md | 94 ++++++++++++++ ...ased_finetuned_ner_chuqiaog_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_ner_emilyblah_en.md | 94 ++++++++++++++ ..._base_uncased_finetuned_ner_mawiwawi_en.md | 94 ++++++++++++++ ...ased_finetuned_ner_mawiwawi_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_ner_misterstino_en.md | 94 ++++++++++++++ ...ncased_finetuned_ner_polo42_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_ner_trubnik1967_en.md | 94 ++++++++++++++ ...d_finetuned_ner_trubnik1967_pipeline_en.md | 70 ++++++++++ ...ert_base_uncased_finetuned_ner_vnear_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_ner_yijingzzz_en.md | 94 ++++++++++++++ ...sed_finetuned_ner_yijingzzz_pipeline_en.md | 70 ++++++++++ ...uncased_finetuned_ner_zy666_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_news_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_sayula_popoluca_en.md | 94 ++++++++++++++ ...netuned_squad_d5716d28_batuhanyilmaz_en.md | 92 ++++++++++++++ ...quad_d5716d28_batuhanyilmaz_pipeline_en.md | 69 ++++++++++ ..._finetuned_squad_d5716d28_physhunter_en.md | 92 ++++++++++++++ ...d_squad_d5716d28_physhunter_pipeline_en.md | 69 ++++++++++ ...sed_finetuned_squad_d5716d28_sofa566_en.md | 92 ++++++++++++++ ...uned_squad_d5716d28_sofa566_pipeline_en.md | 69 ++++++++++ ...e_uncased_finetuned_squad_fattahilmi_en.md | 86 +++++++++++++ ..._finetuned_squad_fattahilmi_pipeline_en.md | 69 ++++++++++ ..._uncased_finetuned_squad_tanishq1420_en.md | 86 +++++++++++++ ...inetuned_squad_yashaswi0506_pipeline_en.md | 69 ++++++++++ ...uncased_finetuned_streamers_pipeline_en.md | 70 ++++++++++ ...e_uncased_go_emotion_bhadresh_savani_en.md | 94 ++++++++++++++ ...lbert_base_uncased_mluonium_pipeline_en.md | 70 ++++++++++ ...ase_uncased_mnli_textattack_pipeline_en.md | 70 ++++++++++ ..._norwegian_perturb_bozhidara_pesheva_en.md | 94 ++++++++++++++ ...tilbert_base_uncased_travel_zphr_5st_en.md | 94 ++++++++++++++ ...ase_uncased_travel_zphr_5st_pipeline_en.md | 70 ++++++++++ .../2024-09-04-distilbert_codeslang_en.md | 94 ++++++++++++++ ...-09-04-distilbert_codeslang_pipeline_en.md | 70 ++++++++++ ...2024-09-04-distilbert_finetuned_coqa_en.md | 86 +++++++++++++ ...4-distilbert_finetuned_coqa_pipeline_en.md | 69 ++++++++++ ...ilbert_finetuned_finer_4_v2_pipeline_en.md | 70 ++++++++++ ...t_finetuned_sayula_popoluca_pipeline_en.md | 70 ++++++++++ ...distilbert_finetuned_squadv2_fuutoru_en.md | 86 +++++++++++++ ...t_finetuned_squadv2_fuutoru_pipeline_en.md | 69 ++++++++++ ...netuned_squadv2_nampham1106_pipeline_en.md | 69 ++++++++++ ...t_finetuned_squadv2_ntn0301_pipeline_en.md | 69 ++++++++++ ...ert_hera_synthetic_pretrain_pipeline_en.md | 70 ++++++++++ ...4-distilbert_imdb_huggingface_cyh002_en.md | 94 ++++++++++++++ ...ert_imdb_huggingface_cyh002_pipeline_en.md | 70 ++++++++++ .../2024-09-04-distilbert_masking_heaps_en.md | 94 ++++++++++++++ .../2024-09-04-distilbert_mlm_practice_en.md | 94 ++++++++++++++ ...-04-distilbert_mlm_practice_pipeline_en.md | 70 ++++++++++ ...04-distilbert_multilingual_cased_lft_xx.md | 94 ++++++++++++++ ...stilbert_qa_BERT_ClinicalQA_pipeline_en.md | 69 ++++++++++ ...9-04-distilbert_qa_COVID_DistilBERTc_en.md | 98 ++++++++++++++ ...ckpoint_500_finetuned_squad_pipeline_en.md | 69 ++++++++++ ...istilbert_qa_distilBertABSA_pipeline_en.md | 69 ++++++++++ .../2024-09-04-distilbert_qa_eurosmart_en.md | 86 +++++++++++++ ...-04-distilbert_qa_test_squad_trained_en.md | 98 ++++++++++++++ ...lbert_qa_test_squad_trained_pipeline_en.md | 69 ++++++++++ ...04-distilbert_tuned_4labels_pipeline_en.md | 70 ++++++++++ ...turkish_sentiment_analysis2_pipeline_en.md | 70 ++++++++++ ...bert_word2vec_256k_mlm_best_pipeline_en.md | 70 ++++++++++ ...-09-04-distilcamembert_base_pipeline_fr.md | 70 ++++++++++ ...-09-04-distillbert_finetuned_finer_4_en.md | 94 ++++++++++++++ ...stillbert_finetuned_medical_symptoms_en.md | 94 ++++++++++++++ ..._finetuned_medical_symptoms_pipeline_en.md | 70 ++++++++++ ...09-04-distillbert_political_finetune_en.md | 94 ++++++++++++++ ...-09-04-distilroberta_base_catalan_v2_ca.md | 94 ++++++++++++++ ...d_wikitext2_squad_qa_wandb2_pipeline_en.md | 69 ++++++++++ ...ndb_week_3_complaints_classifier_512_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model2_skr3178_en.md | 94 ++++++++++++++ ...24-09-04-dummy_model2_tiffanytiffany_en.md | 94 ++++++++++++++ ...dummy_model2_tiffanytiffany_pipeline_en.md | 70 ++++++++++ .../2024-09-04-dummy_model_7_pipeline_en.md | 70 ++++++++++ ...-dummy_model_ainullbabystep_pipeline_en.md | 70 ++++++++++ ...09-04-dummy_model_benchan79_pipeline_en.md | 70 ++++++++++ .../2024-09-04-dummy_model_binitha_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_dry_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_dry_pipeline_en.md | 70 ++++++++++ ...-09-04-dummy_model_ffleming_pipeline_en.md | 70 ++++++++++ ...-04-dummy_model_jianfeng777_pipeline_en.md | 70 ++++++++++ .../2024-09-04-dummy_model_jonathansum_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_maxcarduner_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_raphgg_en.md | 94 ++++++++++++++ ...ummy_model_sunilpinnamaneni_pipeline_en.md | 70 ++++++++++ .../2024-09-04-dummy_model_tanu09_en.md | 94 ++++++++++++++ ...24-09-04-dummy_model_tanu09_pipeline_en.md | 70 ++++++++++ ...-09-04-dummy_model_tpanda09_pipeline_en.md | 70 ++++++++++ .../2024-09-04-dummy_model_umalakshmi07_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_vickysirwani_en.md | 94 ++++++++++++++ .../2024-09-04-dummy_model_viraal_en.md | 94 ++++++++++++++ ...24-09-04-dummy_model_viraal_pipeline_en.md | 70 ++++++++++ ...on_text_classifier_on_dd_v1_pipeline_en.md | 70 ++++++++++ ...9-04-environmentalbert_base_pipeline_en.md | 70 ++++++++++ ...024-09-04-esg_classification_english_en.md | 94 ++++++++++++++ ...-esg_classification_english_pipeline_en.md | 70 ++++++++++ ...04-esg_sentiment_prediction_pipeline_en.md | 70 ++++++++++ .../2024-09-04-facets_gpt_77_pipeline_en.md | 69 ++++++++++ .../2024-09-04-facets_gpt_expanswer_35_en.md | 86 +++++++++++++ ...24-09-04-fine_tuned_model_1_pipeline_en.md | 70 ++++++++++ .../2024-09-04-finer_distillbert_v2_en.md | 94 ++++++++++++++ ...re_distilbert_base_uncased_fill_mask_en.md | 94 ++++++++++++++ ...netuned_sail2017_indic_bert_pipeline_en.md | 70 ++++++++++ .../2024-09-04-finetunedclip_pipeline_en.md | 69 ++++++++++ ...ent_model_3000_samples_benjihearhear_en.md | 94 ++++++++++++++ ..._3000_samples_benjihearhear_pipeline_en.md | 70 ++++++++++ ...0_samples_carlodallaquercia_pipeline_en.md | 70 ++++++++++ .../2024-09-04-first_model_pipeline_en.md | 70 ++++++++++ .../2024-09-04-fnctech_pipeline_en.md | 69 ++++++++++ ...ina_with_transliteration_minangkabau_en.md | 94 ++++++++++++++ ...dpr_anonymiseingsmodel_ganm_pipeline_en.md | 70 ++++++++++ ...lm_base_toponym_recognition_pipeline_en.md | 70 ++++++++++ ...lay_sentiment_analysis_danielribeiro_en.md | 94 ++++++++++++++ ...ment_analysis_danielribeiro_pipeline_en.md | 70 ++++++++++ ...-gqa_roberta_german_legal_squad_2000_de.md | 86 +++++++++++++ ...eberta_v3_base_sentiment_nepal_bhasa_en.md | 94 ++++++++++++++ .../2024-09-04-hw01_hamsty_pipeline_en.md | 70 ++++++++++ ...ase_abusive_oriya_threatening_speech_en.md | 94 ++++++++++++++ ...ve_oriya_threatening_speech_pipeline_en.md | 70 ++++++++++ .../2024-09-04-icelandic_title_setfit_en.md | 86 +++++++++++++ ...9-04-imdb_review_sentiement_pipeline_en.md | 70 ++++++++++ ...supervised_training_1mln_downsampled_en.md | 94 ++++++++++++++ ...d_training_1mln_downsampled_pipeline_en.md | 70 ++++++++++ ...ndic_bert_finetuned_trac_ds_pipeline_en.md | 70 ++++++++++ ...-09-04-indicbert_hindi_urdu_pipeline_en.md | 70 ++++++++++ .../2024-09-04-indicbert_urdu_pipeline_en.md | 70 ++++++++++ ...04-indicner_oriya_finetuned_pipeline_en.md | 70 ++++++++++ ...jave_codemixed_roberta_base_pipeline_id.md | 70 ++++++++++ ...9-04-indonesian_punctuation_pipeline_en.md | 70 ++++++++++ .../2024-09-04-intent_xl_pipeline_en.md | 70 ++++++++++ ...action_qa_model_1_2_roberta_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-04-kalbert_en.md | 94 ++++++++++++++ ...rabic_stt_asr_based_on_whisper_small_ar.md | 84 ++++++++++++ .../2024-09-04-kanberto_pipeline_kn.md | 70 ++++++++++ ...-09-04-kaviel_threat_text_classifier_en.md | 94 ++++++++++++++ ...tion_distilbert_inspec_finetuned_ner_en.md | 94 ++++++++++++++ .../2024-09-04-legal_roberta_large_en.md | 94 ++++++++++++++ .../2024-09-04-linkbert_mini_en.md | 94 ++++++++++++++ .../2024-09-04-linkbert_mini_pipeline_en.md | 70 ++++++++++ .../2024-09-04-lithuanian_hansardmatch_en.md | 86 +++++++++++++ ...-04-lithuanian_hansardmatch_pipeline_en.md | 69 ++++++++++ ...04-lithuanian_namesonly_humancapital_en.md | 86 +++++++++++++ ...nian_namesonly_humancapital_pipeline_en.md | 69 ++++++++++ ...un_data_fine_coarse_english_pipeline_en.md | 69 ++++++++++ ...2024-09-04-lm_ner_skills_recognition_en.md | 94 ++++++++++++++ ...4-lm_ner_skills_recognition_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-mach_1_en.md | 94 ++++++++++++++ .../2024-09-04-magbert_lm_pipeline_en.md | 70 ++++++++++ ..._tonga_tonga_islands_french_lingrui1_en.md | 94 ++++++++++++++ ...onga_islands_french_sooh098_pipeline_en.md | 70 ++++++++++ ...marianmt_igbo_best_18_10_23_pipeline_ig.md | 70 ++++++++++ .../2024-09-04-masking_heaps_distilbert_en.md | 94 ++++++++++++++ ...erta_base_metaphor_detection_spanish_es.md | 94 ++++++++++++++ .../2024-09-04-mdeberta_base_v3_4_en.md | 94 ++++++++++++++ .../2024-09-04-mdeberta_base_v3_5_en.md | 94 ++++++++++++++ ...24-09-04-mdeberta_base_v3_5_pipeline_en.md | 70 ++++++++++ ...9-04-mdeberta_profane_final_pipeline_en.md | 70 ++++++++++ ...3_base_finetuded_porttagger_pipeline_en.md | 70 ++++++++++ ...ase_finetuned_ai4privacy_v2_pipeline_en.md | 70 ++++++++++ ...2024-09-04-mdeberta_v3_base_open_ner_en.md | 94 ++++++++++++++ ...4-mdeberta_v3_base_sst2_100_pipeline_en.md | 70 ++++++++++ ...-mdeberta_v3_base_vnrte_100_pipeline_en.md | 70 ++++++++++ ...024-09-04-mdeberta_v3_emo_multilabel_en.md | 94 ++++++++++++++ ...-mdeberta_v3_emo_multilabel_pipeline_en.md | 70 ++++++++++ ...v3_large_cls_sst2_gladiator_pipeline_en.md | 70 ++++++++++ ..._conll2003_general_model_v1_pipeline_en.md | 70 ++++++++++ ...ix4_japanese_english_fugumt_pipeline_en.md | 70 ++++++++++ ...oft_deberta_v3_large_seed_2_pipeline_en.md | 70 ++++++++++ ...odel_albert_512_token_classification_en.md | 94 ++++++++++++++ ...rt_512_token_classification_pipeline_en.md | 70 ++++++++++ .../2024-09-04-mother_tongue_model_v3_sn.md | 84 ++++++++++++ .../2024-09-04-mount2_model_en.md | 86 +++++++++++++ .../2024-09-04-mount2_model_pipeline_en.md | 69 ++++++++++ .../2024-09-04-mpnet_base_allnli_en.md | 86 +++++++++++++ ...024-09-04-mpnet_base_allnli_pipeline_en.md | 69 ++++++++++ ...ase_qaqg_finetuned_tydiqa_indonesian_en.md | 86 +++++++++++++ ..._qaqg_finetuned_tydiqa_indonesian_ir_en.md | 86 +++++++++++++ ..._tydiqa_indonesian_sentence_pipeline_en.md | 69 ++++++++++ ...rge_cased_portuguese_contratos_tceal_en.md | 94 ++++++++++++++ .../2024-09-04-ner_cw_model_en.md | 94 ++++++++++++++ ...-04-ner_distilbert_textminr_pipeline_en.md | 70 ++++++++++ .../2024-09-04-ner_model_ep_all_en.md | 94 ++++++++++++++ ...2024-09-04-ner_model_ep_all_pipeline_en.md | 70 ++++++++++ .../2024-09-04-ner_model_rujengelal_en.md | 94 ++++++++++++++ ...onventional_fine_tuning_intradiction_en.md | 94 ++++++++++++++ ...al_fine_tuning_intradiction_pipeline_en.md | 70 ++++++++++ ...2024-09-04-nlp_mini_project_pipeline_en.md | 70 ++++++++++ .../2024-09-04-novelicious_qas_pipeline_en.md | 69 ++++++++++ ..._summarization_reward_model_pipeline_en.md | 70 ++++++++++ .../2024-09-04-ope_bert_v1_3_pipeline_en.md | 70 ++++++++++ ...enai_clip_vit_large_patch14_pipeline_en.md | 69 ++++++++++ ...09-04-openai_detector_large_pipeline_en.md | 70 ++++++++++ .../2024-09-04-openclip_negclip_en.md | 120 ++++++++++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ ...ga_islands_romanian_rooshan_pipeline_en.md | 70 ++++++++++ ...onga_tonga_islands_romanian_susmit99_en.md | 94 ++++++++++++++ ...opus_maltese_finetuned_korean_german_en.md | 94 ++++++++++++++ ...ed_ganda_tonga_tonga_islands_english_en.md | 94 ++++++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ ...09-04-pharma_classification_pipeline_en.md | 70 ++++++++++ .../2024-09-04-practice_model_pipeline_en.md | 70 ++++++++++ ...tical_group_camembert_large_pipeline_en.md | 70 ++++++++++ ...tical_group_camembert_tweet_pipeline_en.md | 70 ++++++++++ ...omptengpromptclassification_pipeline_en.md | 70 ++++++++++ ...4-09-04-pubmed_clip_vit_base_patch32_en.md | 120 ++++++++++++++++++ .../2024-09-04-q05_kaggle_debertav2_14_en.md | 94 ++++++++++++++ ...-04-q05_kaggle_debertav2_14_pipeline_en.md | 70 ++++++++++ .../2024-09-04-qa_roberta_model2_en.md | 86 +++++++++++++ ...024-09-04-qa_roberta_model2_pipeline_en.md | 69 ++++++++++ ...04-question_answering_roberta_base_s_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-04-raj_model_en.md | 94 ++++++++++++++ .../2024-09-04-raj_model_pipeline_en.md | 70 ++++++++++ .../2024-09-04-readabert_arabic_ar.md | 94 ++++++++++++++ ...2024-09-04-readabert_arabic_pipeline_ar.md | 70 ++++++++++ ...dability_spanish_paragraphs_pipeline_es.md | 70 ++++++++++ ..._sen_3_sep_true_prefix_true_pipeline_en.md | 70 ++++++++++ ...-relation_detection_textual_pipeline_en.md | 70 ++++++++++ .../2024-09-04-results_gkumi_en.md | 94 ++++++++++++++ .../2024-09-04-results_gkumi_pipeline_en.md | 70 ++++++++++ ...-09-04-results_raj_sharma01_pipeline_en.md | 70 ++++++++++ ...-09-04-reward_model_deberta_v3_large_en.md | 94 ++++++++++++++ ...ward_model_deberta_v3_large_pipeline_en.md | 70 ++++++++++ .../2024-09-04-rise_ner_pipeline_en.md | 70 ++++++++++ ...ako_base_asante_twi_uncased_pipeline_en.md | 70 ++++++++++ ...e_finetuned_suicide_spanish_pipeline_es.md | 70 ++++++++++ ...024-09-04-roberta_base_fake_news_tfg_en.md | 94 ++++++++++++++ ...-roberta_base_fake_news_tfg_pipeline_en.md | 70 ++++++++++ ...-04-roberta_base_fine_tuned_pipeline_en.md | 69 ++++++++++ ...024-09-04-roberta_base_hate_pipeline_en.md | 70 ++++++++++ ...se_sqaud2_on_medical_meadow_medqa_v1_en.md | 86 +++++++++++++ ...berta_base_squad2_finetuned_pipeline_en.md | 69 ++++++++++ ...oberta_base_squad2_finetuned_roberta_en.md | 86 +++++++++++++ ...tuned_cyberbullying_spanish_pipeline_es.md | 70 ++++++++++ ...024-09-04-roberta_finetuned_location_en.md | 86 +++++++++++++ ...-roberta_finetuned_location_pipeline_en.md | 69 ++++++++++ ...-04-roberta_finetuned_machinesfaults_en.md | 86 +++++++++++++ ...berta_finetuned_subjqa_movies_1110pm_en.md | 86 +++++++++++++ ...subjqa_movies_2_soumiknayak_pipeline_en.md | 69 ++++++++++ ...4-09-04-roberta_human_label_pipeline_en.md | 70 ++++++++++ ...ndosquadv2_1691412431_8_2e_05_0_01_5_en.md | 86 +++++++++++++ ...2_1691412431_8_2e_05_0_01_5_pipeline_en.md | 69 ++++++++++ .../2024-09-04-roberta_large_bne_sqac_es.md | 86 +++++++++++++ ...berta_model_abdulrahman4111_pipeline_en.md | 70 ++++++++++ .../2024-09-04-roberta_mrqa_pipeline_en.md | 69 ++++++++++ .../2024-09-04-roberta_qa_Roberta_en.md | 106 ++++++++++++++++ ...24-09-04-roberta_qa_Roberta_pipeline_en.md | 69 ++++++++++ ...4-09-04-roberta_qa_TestQaV1_pipeline_en.md | 69 ++++++++++ ...qa_ai_club_inductions_21_nlp_roBERTa_en.md | 106 ++++++++++++++++ ...ase_spanish_squades_becasincentivos2_es.md | 92 ++++++++++++++ .../2024-09-04-roberta_qa_base_super_1_en.md | 93 ++++++++++++++ ...-04-roberta_qa_base_super_1_pipeline_en.md | 69 ++++++++++ ...4-roberta_qa_finetuned_city_pipeline_en.md | 69 ++++++++++ ...berta_qa_fpdm_hier_roberta_FT_newsqa_en.md | 106 ++++++++++++++++ ...w_shot_k_1024_finetuned_squad_seed_4_en.md | 92 ++++++++++++++ ...a_qa_news_pretrain_roberta_FT_newsqa_en.md | 106 ++++++++++++++++ ...few_shot_k_32_finetuned_squad_seed_2_en.md | 106 ++++++++++++++++ ...k_32_finetuned_squad_seed_2_pipeline_en.md | 69 ++++++++++ ...nwise_token_and_step_losses_3_epochs_en.md | 92 ++++++++++++++ ...-04-roberta_s2orc_books_wiki_bpe_32k_en.md | 94 ++++++++++++++ ...ta_s2orc_books_wiki_bpe_32k_pipeline_en.md | 70 ++++++++++ ...es_microsoft_deberta_v3_large_seed_1_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_1_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-same_story_en.md | 86 +++++++++++++ ...candibert_norwegian_faroese_pipeline_xx.md | 70 ++++++++++ ...-schemeclassifier3_eng_dial_pipeline_en.md | 70 ++++++++++ ...etrained_finetuned_ner_finetuned_ner_en.md | 94 ++++++++++++++ .../2024-09-04-search_shield_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_sst_2_english_en.md | 94 ++++++++++++++ ...sed_finetuned_sst_2_english_pipeline_en.md | 70 ++++++++++ ...2024-09-04-sent_arabert_c19_pipeline_ar.md | 71 +++++++++++ .../2024-09-04-sent_batterybert_uncased_en.md | 94 ++++++++++++++ ...04-sent_bert_base_german_cased_dbmdz_de.md | 94 ++++++++++++++ ...t_base_german_dbmdz_uncased_pipeline_de.md | 71 +++++++++++ ...4-sent_bert_base_italian_cased_dbmdz_it.md | 94 ++++++++++++++ ...rt_base_italian_cased_dbmdz_pipeline_it.md | 71 +++++++++++ ...-09-04-sent_bert_base_uncased_eurlex_en.md | 94 ++++++++++++++ ...nt_bert_base_uncased_eurlex_pipeline_en.md | 71 +++++++++++ ...9-04-sent_bert_distil_ita_legal_bert_en.md | 77 +++++++++++ ...ert_persian_farsi_zwnj_base_pipeline_fa.md | 71 +++++++++++ ...historic_multilingual_cased_pipeline_xx.md | 71 +++++++++++ ...ert_tiny_historic_multilingual_cased_xx.md | 94 ++++++++++++++ .../2024-09-04-sent_bertu_pipeline_mt.md | 71 +++++++++++ ...24-09-04-sent_bertweet_persian_farsi_fa.md | 94 ++++++++++++++ ...rt_base_turkish_mc4_uncased_pipeline_tr.md | 71 +++++++++++ ...nt_convbert_base_turkish_mc4_uncased_tr.md | 94 ++++++++++++++ .../2024-09-04-sent_darijabert_ar.md | 94 ++++++++++++++ ..._39_pep_malay_mlm_xlmr_base_pipeline_xx.md | 71 +++++++++++ ...t_entitycs_39_wep_xlmr_base_pipeline_xx.md | 71 +++++++++++ ...04-sent_finbert_pretrain_yiyanghkust_en.md | 94 ++++++++++++++ .../2024-09-04-sent_hafez_bert_fa.md | 94 ++++++++++++++ .../2024-09-04-sent_hafez_bert_pipeline_fa.md | 71 +++++++++++ ...2024-09-04-sent_indicbertv2_mlm_only_en.md | 94 ++++++++++++++ ...4-sent_indicbertv2_mlm_only_pipeline_en.md | 71 +++++++++++ .../2024-09-04-sent_inlegalbert_en.md | 94 ++++++++++++++ ...sent_legal_bert_base_uncased_nlpaueb_en.md | 94 ++++++++++++++ .../2024-09-04-sent_marbert_ar.md | 94 ++++++++++++++ .../2024-09-04-sent_marbert_pipeline_ar.md | 71 +++++++++++ .../2024-09-04-sent_norbert2_no.md | 94 ++++++++++++++ .../2024-09-04-sent_patentbert_pipeline_en.md | 71 +++++++++++ ...024-09-04-sent_tiny_biobert_pipeline_en.md | 71 +++++++++++ .../2024-09-04-sent_vetbert_en.md | 94 ++++++++++++++ .../2024-09-04-sent_vetbert_pipeline_en.md | 71 +++++++++++ ...erta_base_finetuned_luganda_pipeline_en.md | 71 +++++++++++ ...-09-04-sentencepiecebpe_cc100_french_en.md | 94 ++++++++++++++ ...ebpe_cc100_french_morphemes_pipeline_en.md | 70 ++++++++++ ...ntencepiecebpe_cc100_french_pipeline_en.md | 70 ++++++++++ ...024-09-04-serbian_test_clip_pipeline_en.md | 69 ++++++++++ .../_posts/ahmedlone127/2024-09-04-shus_en.md | 120 ++++++++++++++++++ .../2024-09-04-shus_pipeline_en.md | 69 ++++++++++ ...camembert_base_ccnet_stsb25_pipeline_en.md | 70 ++++++++++ ...-sitexsometre_camembert_large_stsb25_en.md | 94 ++++++++++++++ ..._sotho_all_mpnet_finetuned_comb_3000_en.md | 86 +++++++++++++ ...e_arabic_tonga_tonga_islands_english_en.md | 94 ++++++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ .../2024-09-04-stt_best_pipeline_en.md | 69 ++++++++++ ...berta_base_persian_farsi_qa_pipeline_fa.md | 69 ++++++++++ .../2024-09-04-tenseprediction_en.md | 94 ++++++++++++++ .../2024-09-04-tenseprediction_pipeline_en.md | 70 ++++++++++ .../2024-09-04-test3_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-04-test_1_en.md | 86 +++++++++++++ .../2024-09-04-test_model_pipeline_en.md | 70 ++++++++++ .../2024-09-04-test_trainer_pipeline_en.md | 70 ++++++++++ ...024-09-04-text_classification_nolora_en.md | 94 ++++++++++++++ ...-text_classification_nolora_pipeline_en.md | 70 ++++++++++ ..._random_albertfortokenclassification_en.md | 94 ++++++++++++++ ...lbertfortokenclassification_pipeline_en.md | 70 ++++++++++ ...tokenclassification_ydshieh_pipeline_en.md | 70 ++++++++++ ...tinyclip_vit_39m_16_text_19m_yfcc15m_en.md | 120 ++++++++++++++++++ ...uad2_finetuned_emrqa_msquad_pipeline_en.md | 69 ++++++++++ ...2024-09-04-token_classification_hemg_en.md | 94 ++++++++++++++ ...4-token_classification_hemg_pipeline_en.md | 70 ++++++++++ ...-04-token_classification_park_hip_02_en.md | 94 ++++++++++++++ ..._classification_park_hip_02_pipeline_en.md | 70 ++++++++++ .../2024-09-04-trained_danish_en.md | 94 ++++++++++++++ .../2024-09-04-trained_danish_pipeline_en.md | 70 ++++++++++ ...t_merged_dataset_mdeberta_v3_10epoch_en.md | 94 ++++++++++++++ ...st_merged_dataset_mdeberta_v3_1epoch_en.md | 94 ++++++++++++++ ...ert_large_binary_classifier_pipeline_pt.md | 70 ++++++++++ ...itter_paraphrase_embeddings_pipeline_en.md | 69 ++++++++++ ...04-twitter_roberta_large_hate_latest_en.md | 94 ++++++++++++++ ...ed_docclass_punjabi_eastern_pipeline_en.md | 70 ++++++++++ docs/_posts/ahmedlone127/2024-09-04-v39_en.md | 94 ++++++++++++++ docs/_posts/ahmedlone127/2024-09-04-v51_en.md | 94 ++++++++++++++ .../2024-09-04-vir_pat_qa_pipeline_en.md | 69 ++++++++++ ...2024-09-04-vispell_small_v1_pipeline_vi.md | 70 ++++++++++ ...ultilingual_cased_finetuned_pipeline_xx.md | 70 ++++++++++ ...er_small_finetune_taiwanese_pipeline_en.md | 69 ++++++++++ ...24-09-04-whisper_small_singlish_122k_en.md | 84 ++++++++++++ ...24-09-04-whisper_tiny_english_tyocre_en.md | 84 ++++++++++++ ..._tiny_finetune_pooya_fallah_pipeline_en.md | 69 ++++++++++ ..._base_balance_mixed_aug_replace_bert_en.md | 94 ++++++++++++++ ...4-xlm_roberta_base_finetuned_amharic_am.md | 94 ++++++++++++++ ...inetuned_panx_all_monkdalma_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_all_ultimecia_en.md | 94 ++++++++++++++ ...inetuned_panx_all_ultimecia_pipeline_en.md | 70 ++++++++++ ...ned_panx_english_bluetree99_pipeline_en.md | 70 ++++++++++ ...inetuned_panx_french_chaoli_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_french_handun_en.md | 94 ++++++++++++++ ..._base_finetuned_panx_french_taoyoung_en.md | 94 ++++++++++++++ ...etuned_panx_french_taoyoung_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_german_bessho_en.md | 94 ++++++++++++++ ...a_base_finetuned_panx_german_blanche_en.md | 94 ++++++++++++++ ...tuned_panx_german_french_0ppxnhximxr_en.md | 94 ++++++++++++++ ...x_german_french_0ppxnhximxr_pipeline_en.md | 70 ++++++++++ ...e_finetuned_panx_german_french_drigb_en.md | 94 ++++++++++++++ ...ed_panx_german_french_drigb_pipeline_en.md | 70 ++++++++++ ...inetuned_panx_german_french_neha2608_en.md | 94 ++++++++++++++ ...panx_german_french_neha2608_pipeline_en.md | 70 ++++++++++ ...tuned_panx_german_french_tamo2_3yama_en.md | 94 ++++++++++++++ ...x_german_french_tamo2_3yama_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_french_team_nave_en.md | 94 ++++++++++++++ ...etuned_panx_german_junf1122_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_german_malduwais_en.md | 94 ++++++++++++++ ...ta_base_finetuned_panx_german_sbpark_en.md | 94 ++++++++++++++ ...inetuned_panx_german_songys_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_german_sponomary_en.md | 94 ++++++++++++++ ...finetuned_panx_german_szogi_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_transll_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_italian_cj_mills_en.md | 94 ++++++++++++++ ...uned_panx_italian_sponomary_pipeline_en.md | 70 ++++++++++ ...2_kinyarwanda_amh_eng_train_pipeline_en.md | 70 ++++++++++ ...berta_base_ner_augmentation_pipeline_xx.md | 70 ++++++++++ .../2024-09-04-xlm_roberta_base_ner_xx.md | 94 ++++++++++++++ ...se_russian_sentiment_sentirueval2016_en.md | 94 ++++++++++++++ ...erta_base_trimmed_french_xnli_french_en.md | 94 ++++++++++++++ ..._trimmed_french_xnli_french_pipeline_en.md | 70 ++++++++++ ...emojis_2_client_toxic_fedavg_iid_fed_en.md | 94 ++++++++++++++ ...dosquadv2_1694025616_16_2e_06_0_01_5_en.md | 86 +++++++++++++ ..._1694025616_16_2e_06_0_01_5_pipeline_en.md | 69 ++++++++++ ...ity_microsoft_deberta_v3_base_seed_1_en.md | 94 ++++++++++++++ ...024-09-05-100_sdb_taxxl_truncate_768_en.md | 94 ++++++++++++++ ...-100_sdb_taxxl_truncate_768_pipeline_en.md | 70 ++++++++++ ...1genreviewssentimentsamples_pipeline_en.md | 70 ++++++++++ .../2024-09-05-2020_q1_full_tweets_en.md | 94 ++++++++++++++ ...4-09-05-2020_q1_full_tweets_pipeline_en.md | 70 ++++++++++ .../2024-09-05-2020_q1_full_tweets_tok_en.md | 94 ++++++++++++++ ...-05-2020_q1_full_tweets_tok_pipeline_en.md | 70 ++++++++++ ...001_baseline_xlmr_clickbait_spoiling_en.md | 86 +++++++++++++ ...ine_xlmr_clickbait_spoiling_pipeline_en.md | 69 ++++++++++ .../2024-09-05-4_epoch_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-accu_1_en.md | 94 ++++++++++++++ .../2024-09-05-accu_1_pipeline_en.md | 70 ++++++++++ .../2024-09-05-accu_4_pipeline_en.md | 70 ++++++++++ ...9-05-address_extraction_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-05-adp_model_pipeline_en.md | 70 ++++++++++ .../2024-09-05-afriqa_afroxlmr_squad_v2_en.md | 86 +++++++++++++ ...05-afriqa_afroxlmr_squad_v2_pipeline_en.md | 69 ++++++++++ ...r_base_finetuned_kintweetsc_pipeline_en.md | 70 ++++++++++ ...-afro_xlmr_base_finetuned_kintweetsd_en.md | 94 ++++++++++++++ .../2024-09-05-ai_text_detector_mhk1122_en.md | 94 ++++++++++++++ ...05-ai_text_detector_mhk1122_pipeline_en.md | 70 ++++++++++ .../2024-09-05-aift_model_pipeline_en.md | 70 ++++++++++ ...sh_tonga_tonga_islands_jp_accelerate_en.md | 94 ++++++++++++++ ...tonga_islands_jp_accelerate_pipeline_en.md | 70 ++++++++++ ...albert_base_spanish_2023_11_13_19_24_en.md | 94 ++++++++++++++ ...se_spanish_2023_11_13_19_24_pipeline_en.md | 70 ++++++++++ ...024-09-05-albert_base_v1_semeval2017_en.md | 94 ++++++++++++++ ...-albert_base_v1_semeval2017_pipeline_en.md | 70 ++++++++++ .../2024-09-05-albert_base_v2_fold_3_en.md | 94 ++++++++++++++ ...09-05-albert_base_v2_fold_3_pipeline_en.md | 70 ++++++++++ ...-05-albert_chinese_large_qa_pipeline_zh.md | 69 ++++++++++ .../2024-09-05-albert_chinese_large_qa_zh.md | 86 +++++++++++++ ...09-05-albert_finetuned_tenbook_epoch_en.md | 86 +++++++++++++ ...ert_finetuned_tenbook_epoch_pipeline_en.md | 69 ++++++++++ ...05-albert_persian_farsi_zwnj_base_v2_fa.md | 94 ++++++++++++++ ..._persian_farsi_zwnj_base_v2_pipeline_fa.md | 70 ++++++++++ ...9-05-albert_tiny_chinese_ws_pipeline_zh.md | 70 ++++++++++ .../2024-09-05-albert_xlarge_arabic_ar.md | 94 ++++++++++++++ ...-09-05-albert_xlarge_arabic_pipeline_ar.md | 70 ++++++++++ ...9-05-albert_xlarge_v1_finetuned_mrpc_en.md | 94 ++++++++++++++ ...rt_xlarge_v1_finetuned_mrpc_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-arabert_ner_ar.md | 94 ++++++++++++++ .../2024-09-05-arabert_ner_pipeline_ar.md | 70 ++++++++++ .../2024-09-05-arabic_ner_ace_pipeline_ar.md | 70 ++++++++++ .../2024-09-05-arazn_whisper_small_en.md | 84 ++++++++++++ ...4-09-05-arazn_whisper_small_pipeline_en.md | 69 ++++++++++ ...ing_advancing_question_classifier_v1_en.md | 94 ++++++++++++++ ...cing_question_classifier_v1_pipeline_en.md | 70 ++++++++++ .../2024-09-05-autofill_ner_pipeline_en.md | 70 ++++++++++ ...totrain_danaos_qa_system_49147118912_en.md | 86 +++++++++++++ ...anaos_qa_system_49147118912_pipeline_en.md | 69 ++++++++++ ...-09-05-autotrain_qasbert_44603112362_en.md | 86 +++++++++++++ ...totrain_qasbert_44603112362_pipeline_en.md | 69 ++++++++++ ...rain_xlm_roberta_base_qa_95197146303_en.md | 86 +++++++++++++ ...roberta_base_qa_95197146303_pipeline_en.md | 69 ++++++++++ ...09-05-azerbaijani_question_answering_az.md | 86 +++++++++++++ ...rbaijani_question_answering_pipeline_az.md | 69 ++++++++++ ...king_finetuned_french_squad_pipeline_en.md | 69 ++++++++++ .../2024-09-05-balanced_model_en.md | 94 ++++++++++++++ .../2024-09-05-balanced_model_pipeline_en.md | 70 ++++++++++ ...5-banglaasr_bangla_speech_processing_en.md | 84 ++++++++++++ ...sr_bangla_speech_processing_pipeline_en.md | 69 ++++++++++ ...09-05-bert_base_german_uncased_dbmdz_de.md | 94 ++++++++++++++ ...e_swedish_cased_nepal_bhasa_pipeline_sv.md | 70 ++++++++++ ...-bert_base_swedish_cased_nepal_bhasa_sv.md | 94 ++++++++++++++ ...t_based_turkish_ner_wikiann_pipeline_tr.md | 70 ++++++++++ .../2024-09-05-bert_cased_ner_pipeline_tr.md | 70 ++++++++++ .../2024-09-05-bert_cased_ner_tr.md | 94 ++++++++++++++ .../2024-09-05-bert_finetuned1_arcd_en.md | 86 +++++++++++++ ...-09-05-bert_finetuned1_arcd_pipeline_en.md | 69 ++++++++++ .../2024-09-05-bert_finetuned_en.md | 94 ++++++++++++++ ...24-09-05-bert_finetuned_ner_kirill_a_en.md | 94 ++++++++++++++ ...bert_finetuned_ner_kirill_a_pipeline_en.md | 70 ++++++++++ .../2024-09-05-bert_finetuned_pipeline_en.md | 70 ++++++++++ ...itic_languages_eval_english_pipeline_en.md | 70 ++++++++++ ...-bert_gemma_2_2b_italian_imdb_2bit_0_en.md | 94 ++++++++++++++ ...-bert_medieval_multilingual_pipeline_xx.md | 70 ++++++++++ ...024-09-05-bert_medieval_multilingual_xx.md | 94 ++++++++++++++ ..._ner_craft_augmentedtransfer_english_xx.md | 94 ++++++++++++++ ...9-05-bert_ner_cause_effect_detection_en.md | 94 ++++++++++++++ ..._ner_cause_effect_detection_pipeline_en.md | 70 ++++++++++ .../2024-09-05-bert_ner_classifier_en.md | 94 ++++++++++++++ ...4-09-05-bert_ner_classifier_pipeline_en.md | 70 ++++++++++ ...ner_rubertconv_toxic_editor_pipeline_ru.md | 70 ++++++++++ .../2024-09-05-bert_ner_spacebert_cree_en.md | 94 ++++++++++++++ ...-05-bert_ner_spacebert_cree_pipeline_en.md | 70 ++++++++++ ...024-09-05-bert_ner_spacescibert_cree_en.md | 94 ++++++++++++++ ...-bert_ner_spacescibert_cree_pipeline_en.md | 70 ++++++++++ ...marianalc_finetuned_squad_accelerate_en.md | 92 ++++++++++++++ ..._finetuned_squad_accelerate_pipeline_en.md | 69 ++++++++++ ...tion_answering_cased_squadv2_turkish_en.md | 86 +++++++++++++ ...ering_cased_squadv2_turkish_pipeline_en.md | 69 ++++++++++ ...ier_autotrain_jobberta_23_3671398065_en.md | 98 ++++++++++++++ ...ken_classifier_foodbase_ner_pipeline_en.md | 70 ++++++++++ ...9-05-bert_token_classifier_large_ner_en.md | 102 +++++++++++++++ ..._token_classifier_large_ner_pipeline_en.md | 70 ++++++++++ ..._classifier_norwegian_bokml_base_ner_no.md | 94 ++++++++++++++ ...er_norwegian_bokml_base_ner_pipeline_no.md | 70 ++++++++++ ...t_token_classifier_parsbert_peymaner_fa.md | 94 ++++++++++++++ ...lassifier_parsbert_peymaner_pipeline_fa.md | 70 ++++++++++ ...fier_reddit_ner_place_names_pipeline_en.md | 70 ++++++++++ ...cased_keyword_discriminator_pipeline_en.md | 70 ++++++++++ .../2024-09-05-bert_web_bulgarian_cased_en.md | 94 ++++++++++++++ ...05-bert_web_bulgarian_cased_pipeline_en.md | 70 ++++++++++ ...-05-bertimbau_base_sayula_popoluca_2_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-05-bertislav_cu.md | 94 ++++++++++++++ .../2024-09-05-bertislav_pipeline_cu.md | 70 ++++++++++ .../2024-09-05-berturk_legal_pipeline_tr.md | 70 ++++++++++ .../2024-09-05-berturk_legal_tr.md | 94 ++++++++++++++ .../2024-09-05-bertwithmetadata_en.md | 94 ++++++++++++++ ...2024-09-05-bertwithmetadata_pipeline_en.md | 70 ++++++++++ ...09-05-bge_base_citi_dataset_9k_1k_e1_en.md | 87 +++++++++++++ ...ase_financial_matryoshka_jaswanth160_en.md | 87 +++++++++++++ ...cial_matryoshka_jaswanth160_pipeline_en.md | 69 ++++++++++ ..._base_financial_matryoshka_kr_manish_en.md | 87 +++++++++++++ ...ancial_matryoshka_kr_manish_pipeline_en.md | 69 ++++++++++ ...9-05-bge_base_securiti_dataset_1_v20_en.md | 87 +++++++++++++ ...base_securiti_dataset_1_v20_pipeline_en.md | 69 ++++++++++ ...9-05-bge_large_chinese_v1_6_pipeline_en.md | 69 ++++++++++ ...4-09-05-bge_micro_v2_esg_v2_pipeline_en.md | 69 ++++++++++ ...mall_bioasq_3epochs_batch32_pipeline_en.md | 69 ++++++++++ ...e_small_english_v1_5_esg_v2_pipeline_en.md | 69 ++++++++++ ...sh_v1_5_hpc_lab_docs_fine_tuned_test_en.md | 87 +++++++++++++ ...pc_lab_docs_fine_tuned_test_pipeline_en.md | 69 ++++++++++ .../2024-09-05-bias_detection_ner_en.md | 94 ++++++++++++++ ...24-09-05-bias_detection_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-05-bible_roberta_base_en.md | 94 ++++++++++++++ ...24-09-05-bible_roberta_base_pipeline_en.md | 70 ++++++++++ .../2024-09-05-biobert_fachpraktikum_en.md | 94 ++++++++++++++ ...09-05-biobert_fachpraktikum_pipeline_en.md | 70 ++++++++++ ...-05-biobert_full_finetuned_ner_pablo_en.md | 94 ++++++++++++++ ...rt_full_finetuned_ner_pablo_pipeline_en.md | 70 ++++++++++ .../2024-09-05-biomed_roberta_base_4096_en.md | 94 ++++++++++++++ ...05-biomed_roberta_base_4096_pipeline_en.md | 70 ++++++++++ ...-09-05-biomedical_ner_all_datasets_4_en.md | 94 ++++++++++++++ ...omedical_ner_all_datasets_4_pipeline_en.md | 70 ++++++++++ ...lp_biomedbert_large_uncased_abstract_en.md | 94 ++++++++++++++ ...pubmedbert_proteinstructure_ner_v3_1_en.md | 94 ++++++++++++++ ...t_proteinstructure_ner_v3_1_pipeline_en.md | 70 ++++++++++ ...o_ehr_spanish_carmen_humano_pipeline_es.md | 70 ++++++++++ .../2024-09-05-bsc_bio_ehr_spanish_es.md | 94 ++++++++++++++ ...4-09-05-bsc_bio_ehr_spanish_pipeline_es.md | 70 ++++++++++ ...rmese_awesome_eli5_mlm_model_nateile_en.md | 94 ++++++++++++++ ...urmese_awesome_model_jasssz_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_model_lenatt_pipeline_en.md | 70 ++++++++++ ...mese_awesome_opus_books_model_wzchen_en.md | 94 ++++++++++++++ ...ome_opus_books_model_wzchen_pipeline_en.md | 70 ++++++++++ ...24-09-05-burmese_awesome_wnut_model2_en.md | 92 ++++++++++++++ ...urmese_awesome_wnut_model_abbie_tsao_en.md | 94 ++++++++++++++ ...esome_wnut_model_abbie_tsao_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_model_beenish0092_en.md | 94 ++++++++++++++ ...some_wnut_model_beenish0092_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_calebz9527_en.md | 94 ++++++++++++++ ...esome_wnut_model_calebz9527_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_casual_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_chuhao1305_en.md | 94 ++++++++++++++ ...urmese_awesome_wnut_model_connerside_en.md | 94 ++++++++++++++ ...esome_wnut_model_connerside_pipeline_en.md | 70 ++++++++++ ...awesome_wnut_model_donbasta_pipeline_en.md | 70 ++++++++++ ...wesome_wnut_model_duggurani_pipeline_en.md | 70 ++++++++++ ...esome_wnut_model_fukada6280_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_girsha_pipeline_en.md | 70 ++++++++++ ...se_awesome_wnut_model_hamzamushtaq12_en.md | 94 ++++++++++++++ ...e_wnut_model_hamzamushtaq12_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_irishzhang_en.md | 94 ++++++++++++++ ...esome_wnut_model_irishzhang_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_jaydip_tss_en.md | 94 ++++++++++++++ ...esome_wnut_model_jaydip_tss_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_model_laitrongduc_en.md | 94 ++++++++++++++ ...some_wnut_model_langchain12_pipeline_en.md | 70 ++++++++++ ...9-05-burmese_awesome_wnut_model_lash_en.md | 94 ++++++++++++++ ...e_wnut_model_manikanta_goli_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_manusj_pipeline_en.md | 70 ++++++++++ ...-burmese_awesome_wnut_model_qminh369_en.md | 94 ++++++++++++++ ...awesome_wnut_model_qminh369_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_model_thypogean_en.md | 94 ++++++++++++++ ...burmese_awesome_wnut_model_yjoonjang_en.md | 94 ++++++++++++++ ...05-burmese_awesome_wnut_model_yohand_en.md | 94 ++++++++++++++ ...e_awesome_wnut_model_yohand_pipeline_en.md | 70 ++++++++++ ...awesome_wnut_model_yuting27_pipeline_en.md | 70 ++++++++++ ...-burmese_awesome_wnut_place_pipeline_en.md | 70 ++++++++++ ...-09-05-burmese_nepal_bhasa_ner_model_en.md | 94 ++++++++++++++ ...rmese_nepal_bhasa_ner_model_pipeline_en.md | 70 ++++++++++ ...4-09-05-burmese_ner_model_balciberin_en.md | 94 ++++++++++++++ ...urmese_ner_model_balciberin_pipeline_en.md | 70 ++++++++++ ...-burmese_ner_model_mundo_go_pipeline_en.md | 70 ++++++++++ ...2024-09-05-burmese_ner_model_rwindia_en.md | 94 ++++++++++++++ ...024-09-05-burmese_ner_model_uppaluru_en.md | 94 ++++++++++++++ ...-burmese_ner_model_uppaluru_pipeline_en.md | 70 ++++++++++ .../2024-09-05-businessbert_en.md | 94 ++++++++++++++ .../2024-09-05-businessbert_pipeline_en.md | 70 ++++++++++ .../2024-09-05-candle_cvss_availability_en.md | 94 ++++++++++++++ ...05-candle_cvss_availability_pipeline_en.md | 70 ++++++++++ .../2024-09-05-candle_cvss_scope_en.md | 94 ++++++++++++++ ...24-09-05-candle_cvss_vector_pipeline_en.md | 70 ++++++++++ ...ancellation_distilbert_base_cased_v1_en.md | 94 ++++++++++++++ .../2024-09-05-cefr_model_pipeline_en.md | 70 ++++++++++ .../2024-09-05-checkpoint_11600_en.md | 94 ++++++++++++++ ...2024-09-05-checkpoint_11600_pipeline_en.md | 70 ++++++++++ .../2024-09-05-checkpoint_14200_en.md | 94 ++++++++++++++ ...2024-09-05-checkpoint_14200_pipeline_en.md | 70 ++++++++++ .../2024-09-05-checkpoint_22200_en.md | 94 ++++++++++++++ ...2024-09-05-checkpoint_22200_pipeline_en.md | 70 ++++++++++ ...24-09-05-chemberta_pubchem1m_shard00_en.md | 94 ++++++++++++++ ...chemberta_pubchem1m_shard00_pipeline_en.md | 70 ++++++++++ ...-05-cino_base_v2_tncc_document_tsheg_en.md | 94 ++++++++++++++ ...base_v2_tncc_document_tsheg_pipeline_en.md | 70 ++++++++++ ...05-cino_large_v2_tncc_document_tsheg_en.md | 94 ++++++++++++++ ...arge_v2_tncc_document_tsheg_pipeline_en.md | 70 ++++++++++ ...09-05-cino_large_v2_tncc_title_tsheg_en.md | 94 ++++++++++++++ ...o_large_v2_tncc_title_tsheg_pipeline_en.md | 70 ++++++++++ ...-clasificador_muchocine_modeloalbert_en.md | 94 ++++++++++++++ ...ador_muchocine_modeloalbert_pipeline_en.md | 70 ++++++++++ ...cutiesruns__evidencealignment_albert_en.md | 94 ++++++++++++++ ...s__evidencealignment_albert_pipeline_en.md | 70 ++++++++++ ...024-09-05-classify_isin_step6_binary_en.md | 94 ++++++++++++++ ...-classify_isin_step6_binary_pipeline_en.md | 70 ++++++++++ ...rt_full_finetuned_ner_pablo_pipeline_en.md | 70 ++++++++++ .../2024-09-05-clip_finetuned_en.md | 120 ++++++++++++++++++ .../2024-09-05-clip_large_fp16_en.md | 120 ++++++++++++++++++ .../2024-09-05-clip_rsicd_ngit_en.md | 120 ++++++++++++++++++ .../2024-09-05-clip_rsicd_ngit_pipeline_en.md | 69 ++++++++++ .../2024-09-05-clip_seed_vit_8_pipeline_en.md | 69 ++++++++++ ...05-clip_vit_base_patch16_adasdimchom_en.md | 120 ++++++++++++++++++ ...it_base_patch16_adasdimchom_pipeline_en.md | 69 ++++++++++ ..._vit_base_patch16_img_text_relevancy_en.md | 120 ++++++++++++++++++ ..._base_patch32_demo_xiaoliy2_pipeline_en.md | 69 ++++++++++ ...05-clip_vit_large_patch14_224_korean_en.md | 120 ++++++++++++++++++ ...it_large_patch14_224_korean_pipeline_en.md | 69 ++++++++++ ...lip_vit_large_patch14_custom_handler_en.md | 120 ++++++++++++++++++ ...arge_patch14_custom_handler_pipeline_en.md | 69 ++++++++++ ..._vit_large_patch14_finetuned_general_en.md | 120 ++++++++++++++++++ ...e_patch14_finetuned_general_pipeline_en.md | 69 ++++++++++ .../2024-09-05-clip_zabir_2_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-05-codebertapy_en.md | 94 ++++++++++++++ .../2024-09-05-codebertapy_pipeline_en.md | 70 ++++++++++ .../2024-09-05-codegeneration_en.md | 94 ++++++++++++++ .../2024-09-05-codegeneration_pipeline_en.md | 70 ++++++++++ .../2024-09-05-commitpredictor_en.md | 94 ++++++++++++++ .../2024-09-05-commitpredictor_pipeline_en.md | 70 ++++++++++ ...-conflibert_named_entity_recognition_en.md | 94 ++++++++++++++ ...rt_named_entity_recognition_pipeline_en.md | 70 ++++++++++ .../2024-09-05-context_two_pipeline_en.md | 70 ++++++++++ ...bert_base_turkish_mc4_cased_pipeline_tr.md | 70 ++++++++++ ...9-05-convbert_base_turkish_mc4_cased_tr.md | 94 ++++++++++++++ ...05-convbert_base_turkish_mc4_uncased_tr.md | 94 ++++++++++++++ ...024-09-05-cpu_economywide_classifier_en.md | 92 ++++++++++++++ .../2024-09-05-cpu_target_classifier_en.md | 92 ++++++++++++++ ...09-05-cpu_target_classifier_pipeline_en.md | 70 ++++++++++ .../2024-09-05-cross_encoder_v1_en.md | 94 ++++++++++++++ ...2024-09-05-cross_encoder_v1_pipeline_en.md | 70 ++++++++++ ...-crossencoder_camembert_l10_mmarcofr_fr.md | 94 ++++++++++++++ ...oder_camembert_l10_mmarcofr_pipeline_fr.md | 70 ++++++++++ ...5-crossencoder_mminilmv2_l6_mmarcofr_fr.md | 94 ++++++++++++++ ...coder_mminilmv2_l6_mmarcofr_pipeline_fr.md | 70 ++++++++++ ...ncy_intent_search_detection_pipeline_en.md | 70 ++++++++++ ...-09-05-ct_cos_xlmr_20230814_pipeline_en.md | 69 ++++++++++ ...9-05-ct_cos_xlmr_20230923_1_pipeline_en.md | 69 ++++++++++ ...-09-05-ct_kld_xlmr_20230908_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_kld_xlmr_20230919_1_en.md | 86 +++++++++++++ ...9-05-ct_kld_xlmr_20230919_1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_kld_xlmr_20230920_2_en.md | 86 +++++++++++++ ...9-05-ct_kld_xlmr_20230920_2_pipeline_en.md | 69 ++++++++++ ...9-05-ct_kld_xlmr_20230923_1_pipeline_en.md | 69 ++++++++++ ...24-09-05-ct_kld_xlmr_idkmrc_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_kld_xlmr_squadv2_en.md | 86 +++++++++++++ ...4-09-05-ct_kld_xlmr_squadv2_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_10june23_1_en.md | 86 +++++++++++++ ...-09-05-ct_qa_002_10june23_1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_11june23_1_en.md | 86 +++++++++++++ ...-09-05-ct_qa_002_11june23_1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_13june23_1_en.md | 86 +++++++++++++ ...-09-05-ct_qa_002_13june23_1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_9june23_2_en.md | 86 +++++++++++++ ...4-09-05-ct_qa_002_9june23_2_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_9june23_3_en.md | 86 +++++++++++++ ...4-09-05-ct_qa_002_9june23_3_pipeline_en.md | 69 ++++++++++ .../2024-09-05-ct_qa_002_9june23_en.md | 86 +++++++++++++ .../2024-09-05-ct_trial_9june23_en.md | 86 +++++++++++++ ...2024-09-05-ct_trial_9june23_pipeline_en.md | 69 ++++++++++ ...ataequity_kde4_english_spanish_qlora_en.md | 94 ++++++++++++++ ..._kde4_english_spanish_qlora_pipeline_en.md | 70 ++++++++++ ...taequity_opus_maltese_spanish_arabic_en.md | 94 ++++++++++++++ ...opus_maltese_spanish_arabic_pipeline_en.md | 70 ++++++++++ ...aequity_opus_maltese_tagalog_english_en.md | 94 ++++++++++++++ .../2024-09-05-dbbert_pipeline_el.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-dbert_ai4p_en.md | 94 ++++++++++++++ ...amazon_reviews_v1_krishankantsinghal_en.md | 94 ++++++++++++++ ...views_v1_krishankantsinghal_pipeline_en.md | 70 ++++++++++ ...-09-05-deberta_attr_score_90fr_final_en.md | 94 ++++++++++++++ ...berta_attr_score_90fr_final_pipeline_en.md | 70 ++++++++++ ...se_zero_shot_classifier_mnli_anli_v3_en.md | 109 ++++++++++++++++ ...ta_sentencelevel_nofeatures_pipeline_en.md | 70 ++++++++++ .../2024-09-05-deberta_senti_over_en.md | 94 ++++++++++++++ ...24-09-05-deberta_senti_over_pipeline_en.md | 70 ++++++++++ ...-05-deberta_v3_base__sst2__all_train_en.md | 94 ++++++++++++++ ...ta_v3_base__sst2__all_train_pipeline_en.md | 70 ++++++++++ ...base_finetuned_cola_midterm_pipeline_en.md | 70 ++++++++++ ...2024-09-05-deberta_v3_base_nli_2x_v0_en.md | 94 ++++++++++++++ ...5-deberta_v3_base_nli_2x_v0_pipeline_en.md | 70 ++++++++++ ...-deberta_v3_base_prompt_injection_v2_en.md | 94 ++++++++++++++ ...a_fact_main_passage_rater_half_human_en.md | 94 ++++++++++++++ ...in_passage_rater_half_human_pipeline_en.md | 70 ++++++++++ ...-deberta_v3_base_tasksource_toxicity_en.md | 94 ++++++++++++++ ...v3_base_tasksource_toxicity_pipeline_en.md | 70 ++++++++++ ...ta_v3_large_fever_garcialnk_pipeline_en.md | 70 ++++++++++ ...erta_v3_large_finetuned_cola_midterm_en.md | 94 ++++++++++++++ ...arge_finetuned_cola_midterm_pipeline_en.md | 70 ++++++++++ ...elated_passage_consistency_rater_all_en.md | 94 ++++++++++++++ ...ssage_consistency_rater_all_pipeline_en.md | 70 ++++++++++ ...3_large_survey_topicality_rater_half_en.md | 94 ++++++++++++++ ...urvey_topicality_rater_half_pipeline_en.md | 70 ++++++++++ ...05-deberta_v3_small_multilabel_mixed_en.md | 94 ++++++++++++++ ...a_v3_small_multilabel_mixed_pipeline_en.md | 70 ++++++++++ .../2024-09-05-deberta_v3_sta_rel_en.md | 94 ++++++++++++++ ...24-09-05-deberta_v3_sta_rel_pipeline_en.md | 70 ++++++++++ .../2024-09-05-delip_vit_large_512_v0_1_en.md | 120 ++++++++++++++++++ ...05-delip_vit_large_512_v0_1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-dictabert_ner_he.md | 94 ++++++++++++++ .../2024-09-05-dictabert_ner_pipeline_he.md | 70 ++++++++++ ...09-05-digital_physical_classifier_v2_en.md | 94 ++++++++++++++ ...ital_physical_classifier_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-discharge_albert_en.md | 94 ++++++++++++++ ...2024-09-05-discharge_albert_pipeline_en.md | 70 ++++++++++ .../2024-09-05-disorbert_pipeline_en.md | 70 ++++++++++ ...-09-05-distilbert_amazon_shoe_review_en.md | 98 ++++++++++++++ ...stilbert_amazon_shoe_review_pipeline_en.md | 70 ++++++++++ ...05-distilbert_base_cased_ner_dumiiii_en.md | 94 ++++++++++++++ ...e_cased_ner_tunahangokcimen_pipeline_en.md | 70 ++++++++++ ...donesian_finetuned_prdect_indonesian_id.md | 94 ++++++++++++++ ...finetuned_prdect_indonesian_pipeline_id.md | 70 ++++++++++ ..._finetuned_german_portuguese_spanish_xx.md | 94 ++++++++++++++ ..._base_squad_tfm_1_question_answering_en.md | 86 +++++++++++++ ...ad_tfm_1_question_answering_pipeline_en.md | 69 ++++++++++ ...ase_uncased_emotion_ft_0703_pipeline_en.md | 70 ++++++++++ ..._base_uncased_finetuned_emotion_5to9_en.md | 94 ++++++++++++++ ...ased_finetuned_emotion_5to9_pipeline_en.md | 70 ++++++++++ ...uncased_finetuned_emotion_aikozvezda_en.md | 94 ++++++++++++++ ...inetuned_emotion_aikozvezda_pipeline_en.md | 70 ++++++++++ ..._uncased_finetuned_emotion_elshehawy_en.md | 94 ++++++++++++++ ..._uncased_finetuned_emotion_parthiv99_en.md | 94 ++++++++++++++ ...finetuned_emotion_parthiv99_pipeline_en.md | 70 ++++++++++ ...ncased_finetuned_emotion_wlrnfyd0329_en.md | 94 ++++++++++++++ ...netuned_emotion_wlrnfyd0329_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_emotions_klenam_en.md | 94 ++++++++++++++ ...d_finetuned_emotions_klenam_pipeline_en.md | 70 ++++++++++ ...ased_finetuned_imdb_abh1na5_pipeline_en.md | 70 ++++++++++ ...elerate_pragash_mohanarajah_pipeline_en.md | 70 ++++++++++ ...tuned_imdb_accelerate_rajabilalnazir_en.md | 94 ++++++++++++++ ...b_accelerate_rajabilalnazir_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_imdb_alex_atelo_en.md | 94 ++++++++++++++ ...d_finetuned_imdb_alex_atelo_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_imdb_chrisantha_en.md | 94 ++++++++++++++ ...d_finetuned_imdb_chrisantha_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_imdb_jfcruz13_en.md | 94 ++++++++++++++ ...ase_uncased_finetuned_imdb_kennytheo_en.md | 94 ++++++++++++++ ...ed_finetuned_imdb_kennytheo_pipeline_en.md | 70 ++++++++++ ...rt_base_uncased_finetuned_imdb_miktf_en.md | 94 ++++++++++++++ ...ncased_finetuned_imdb_miktf_pipeline_en.md | 70 ++++++++++ ..._base_uncased_finetuned_imdb_muffato_en.md | 94 ++++++++++++++ ...ased_finetuned_imdb_muffato_pipeline_en.md | 70 ++++++++++ ...ed_imdb_nlp_course_chapter7_section2_en.md | 94 ++++++++++++++ ...lp_course_chapter7_section2_pipeline_en.md | 70 ++++++++++ ...ed_finetuned_imdb_zhenchuan_pipeline_en.md | 70 ++++++++++ ...bert_base_uncased_finetuned_lm_attck_en.md | 94 ++++++++++++++ ..._uncased_finetuned_lm_attck_pipeline_en.md | 70 ++++++++++ ..._base_uncased_finetuned_neg_pipeline_en.md | 70 ++++++++++ ...etuned_negation_scope_classification_en.md | 94 ++++++++++++++ ...gation_scope_classification_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_ner_ceciliafu_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_ner_digidix28_pipeline_en.md | 70 ++++++++++ ...uncased_finetuned_ner_douglasadams11_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_ner_fatimetou_en.md | 94 ++++++++++++++ ...sed_finetuned_ner_fatimetou_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_ner_furongzou_pipeline_en.md | 70 ++++++++++ ...ncased_finetuned_ner_ggital_pipeline_en.md | 70 ++++++++++ ...t_base_uncased_finetuned_ner_hcy5561_en.md | 94 ++++++++++++++ ...cased_finetuned_ner_hcy5561_pipeline_en.md | 70 ++++++++++ ...t_base_uncased_finetuned_ner_karunac_en.md | 94 ++++++++++++++ ...cased_finetuned_ner_karunac_pipeline_en.md | 70 ++++++++++ ...rt_base_uncased_finetuned_ner_lum4yx_en.md | 94 ++++++++++++++ ...rt_base_uncased_finetuned_ner_mldscz_en.md | 94 ++++++++++++++ ...ncased_finetuned_ner_mldscz_pipeline_en.md | 70 ++++++++++ ...rt_base_uncased_finetuned_ner_nsboan_en.md | 94 ++++++++++++++ ...ed_finetuned_ner_shuvayanti_pipeline_en.md | 70 ++++++++++ ...cased_finetuned_ner_ugrozkr_pipeline_en.md | 70 ++++++++++ ...lbert_base_uncased_finetuned_reactjs_en.md | 94 ++++++++++++++ ...e_uncased_finetuned_reactjs_pipeline_en.md | 70 ++++++++++ ...lbert_base_uncased_finetuned_recipes_en.md | 94 ++++++++++++++ ...inetuned_sayula_popoluca_kazakh_3080_en.md | 94 ++++++++++++++ ...se_uncased_finetuned_sentiment_luluw_en.md | 94 ++++++++++++++ ...ased_finetuned_srl_jing1113_pipeline_en.md | 70 ++++++++++ ...d_finetuned_sst_2_english_distilbert_en.md | 94 ++++++++++++++ ...ed_sst_2_english_distilbert_pipeline_en.md | 70 ++++++++++ ...stilbert_base_uncased_finetuned_yelp_en.md | 94 ++++++++++++++ ...n_chamorro_cree_entry_classification_de.md | 94 ++++++++++++++ ...o_cree_entry_classification_pipeline_de.md | 70 ++++++++++ ...ilbert_base_uncased_odm_zphr_0st17sd_en.md | 94 ++++++++++++++ ...se_uncased_odm_zphr_0st17sd_pipeline_en.md | 70 ++++++++++ ...rt_base_uncased_pii_finance_pipeline_en.md | 70 ++++++++++ ...distilbert_base_uncased_qqp_pipeline_en.md | 70 ++++++++++ ...cased_tokenclassification_yeji_seong_en.md | 94 ++++++++++++++ ...drugscom_depression_reviews_pipeline_en.md | 70 ++++++++++ ...-05-distilbert_enron_hf_format_ft_v2_en.md | 94 ++++++++++++++ ...lbert_enron_hf_format_ft_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-distilbert_exp_en.md | 94 ++++++++++++++ ...05-distilbert_finetuned_ner_rasyosef_en.md | 94 ++++++++++++++ ...bert_finetuned_ner_rasyosef_pipeline_en.md | 70 ++++++++++ ...09-05-distilbert_finetuned_pii_mjalg_en.md | 94 ++++++++++++++ ...tilbert_finetuned_pii_mjalg_pipeline_en.md | 70 ++++++++++ ...etuned_token_classification_ner_trip_en.md | 94 ++++++++++++++ ...ken_classification_ner_trip_pipeline_en.md | 70 ++++++++++ ...t_finetuned_vietnamese_question_type_en.md | 98 ++++++++++++++ ...ed_vietnamese_question_type_pipeline_en.md | 70 ++++++++++ ...rt_git_commits_bugfix_classification_en.md | 98 ++++++++++++++ ...09-05-distilbert_imdb_padding20model_en.md | 94 ++++++++++++++ ...tilbert_imdb_padding20model_pipeline_en.md | 70 ++++++++++ ...tilbert_imdb_padding40model_pipeline_en.md | 70 ++++++++++ ...-09-05-distilbert_lolchamps_pipeline_en.md | 70 ++++++++++ ...05-distilbert_ner_augmented_pipeline_en.md | 70 ++++++++++ ...-05-distilbert_ner_japanese_pipeline_en.md | 70 ++++++++++ ...entiment_analysis_multiclass_dataset_en.md | 94 ++++++++++++++ ...analysis_multiclass_dataset_pipeline_en.md | 70 ++++++++++ ..._sentiment_classifier_kiel1_pipeline_en.md | 70 ++++++++++ ...5-distilbert_tokenizer_256k_mlm_500k_en.md | 94 ++++++++++++++ ...ert_tokenizer_256k_mlm_500k_pipeline_en.md | 70 ++++++++++ ...istilbert_toxic_detector_multi_label_en.md | 94 ++++++++++++++ ..._toxic_detector_multi_label_pipeline_en.md | 70 ++++++++++ ...9-05-distilbert_turkish_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-05-distilbert_v0_en.md | 94 ++++++++++++++ .../2024-09-05-distill_sarcasm_english_en.md | 94 ++++++++++++++ ...-05-distill_sarcasm_english_pipeline_en.md | 70 ++++++++++ ...09-05-distillbert_fine_tune_ner_task_en.md | 94 ++++++++++++++ ...stillbert_finetuned_ner_btc_pipeline_en.md | 70 ++++++++++ .../2024-09-05-distilled_roberta_en.md | 94 ++++++++++++++ ...024-09-05-distilled_roberta_pipeline_en.md | 70 ++++++++++ ...e_finetuned_energy_tweets_fullsample_en.md | 94 ++++++++++++++ ...024-09-05-distilroberta_base_ft_news_en.md | 94 ++++++++++++++ ...-distilroberta_base_ft_news_pipeline_en.md | 70 ++++++++++ ...ase_uncased_tokenclassification_lora_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-05-dock_3_en.md | 94 ++++++++++++++ .../2024-09-05-dock_3_pipeline_en.md | 70 ++++++++++ .../2024-09-05-drbert_casm2_pipeline_fr.md | 70 ++++++++++ ...-05-dynamic_tinybert_finetuned_squad_en.md | 86 +++++++++++++ ...ic_tinybert_finetuned_squad_pipeline_en.md | 69 ++++++++++ .../2024-09-05-efficient_mlm_m0_10_en.md | 94 ++++++++++++++ ...4-09-05-efficient_mlm_m0_10_pipeline_en.md | 70 ++++++++++ .../2024-09-05-efficient_mlm_m0_30_en.md | 94 ++++++++++++++ ...4-09-05-efficient_mlm_m0_30_pipeline_en.md | 70 ++++++++++ .../2024-09-05-efficient_mlm_m0_70_en.md | 94 ++++++++++++++ ...4-09-05-efficient_mlm_m0_70_pipeline_en.md | 70 ++++++++++ .../2024-09-05-efficientnet_b0_urdu_ocr_en.md | 94 ++++++++++++++ ...05-efficientnet_b0_urdu_ocr_pipeline_en.md | 70 ++++++++++ ...embeddings_araelectra_base_generator_ar.md | 112 ++++++++++++++++ ...s_araelectra_base_generator_pipeline_ar.md | 70 ++++++++++ ...05-electra_qa_base_finetuned_squadv2_en.md | 98 ++++++++++++++ ...a_qa_base_finetuned_squadv2_pipeline_en.md | 69 ++++++++++ ...05-emoji_emoji_random3_seed0_bernice_en.md | 94 ++++++++++++++ ...emoji_random3_seed0_bernice_pipeline_en.md | 70 ++++++++++ ...24-09-05-english_hebrew_modern_large_en.md | 94 ++++++++++++++ ...english_hebrew_modern_large_pipeline_en.md | 70 ++++++++++ ...24-09-05-english_swahili_translation_en.md | 94 ++++++++++++++ ...english_swahili_translation_pipeline_en.md | 70 ++++++++++ ...nglish_tonga_tonga_islands_arabic_v2_en.md | 94 ++++++++++++++ ...09-05-enlm_roberta_130_imdb_pipeline_en.md | 70 ++++++++++ ...024-09-05-esci_us_mpnet_crossencoder_en.md | 94 ++++++++++++++ ...-esci_us_mpnet_crossencoder_pipeline_en.md | 70 ++++++++++ ...assification_french_english_pipeline_fr.md | 70 ++++++++++ .../2024-09-05-esmlmt59_2500_en.md | 94 ++++++++++++++ .../2024-09-05-esmlmt59_2500_pipeline_en.md | 70 ++++++++++ ...experiment_foreign_language_pipeline_en.md | 70 ++++++++++ .../2024-09-05-fake_news_classifier_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-05-fin_roberta_en.md | 94 ++++++++++++++ .../2024-09-05-fin_roberta_pipeline_en.md | 70 ++++++++++ ...e_news_classifier_kanuri_v7_pipeline_ko.md | 70 ++++++++++ .../2024-09-05-finbert_pretrain_en.md | 94 ++++++++++++++ ...2024-09-05-finbert_pretrain_pipeline_en.md | 70 ++++++++++ ...tune_whisper_small_malay_singlish_v2_en.md | 84 ++++++++++++ ...per_small_malay_singlish_v2_pipeline_en.md | 69 ++++++++++ ...024-09-05-finetuned_distilbert_model_en.md | 98 ++++++++++++++ ...-finetuned_distilbert_model_pipeline_en.md | 70 ++++++++++ ...inki_nlp_opus_maltese_korean_english_en.md | 94 ++++++++++++++ ...opus_maltese_korean_english_pipeline_en.md | 70 ++++++++++ ...finetuned_iitp_pdt_review_indic_bert_en.md | 94 ++++++++++++++ ...anmtmodel_v2_specialfrom_ccmatrix77k_en.md | 94 ++++++++++++++ .../2024-09-05-finetuned_model_pipeline_en.md | 70 ++++++++++ ...2024-09-05-finetuned_ner_sarthak7777_en.md | 94 ++++++++++++++ ...mt_english_tonga_tonga_islands_tamil_en.md | 94 ++++++++++++++ ...h_tonga_tonga_islands_tamil_pipeline_en.md | 70 ++++++++++ ...ift6758_hw6_sentiment_model_pipeline_en.md | 70 ++++++++++ ...-finetuning_movie_sentiment_analysis_en.md | 94 ++++++++++++++ ...sentiment_model_3000_samples_gaurimm_en.md | 94 ++++++++++++++ ...nt_model_3000_samples_yuezhangjoslin_en.md | 94 ++++++++++++++ ...netuning_sentiment_model_thread_3000_en.md | 94 ++++++++++++++ ...sentiment_model_thread_3000_pipeline_en.md | 70 ++++++++++ .../2024-09-05-flip_base_32_en.md | 120 ++++++++++++++++++ .../2024-09-05-flip_base_32_pipeline_en.md | 69 ++++++++++ .../2024-09-05-flip_large_14_en.md | 120 ++++++++++++++++++ .../2024-09-05-flip_large_14_pipeline_en.md | 69 ++++++++++ .../2024-09-05-fralbert_base_fr.md | 94 ++++++++++++++ .../2024-09-05-fralbert_base_pipeline_fr.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-fromhf_en.md | 94 ++++++++++++++ .../2024-09-05-fromhf_pipeline_en.md | 70 ++++++++++ ...t_pred_seqeval_partialmatch_pipeline_en.md | 70 ++++++++++ ...ial_news_sentiment_analysis_nlp_feup_en.md | 94 ++++++++++++++ ...sentiment_analysis_nlp_feup_pipeline_en.md | 70 ++++++++++ ...al_news_sentiment_analysis_rnribeiro_en.md | 94 ++++++++++++++ ...entiment_analysis_rnribeiro_pipeline_en.md | 70 ++++++++++ ...-ft_opensubs_arabic_english_marianmt_en.md | 94 ++++++++++++++ ...ubs_arabic_english_marianmt_pipeline_en.md | 70 ++++++++++ ...9-05-furina_seed42_eng_amh_esp_roman_en.md | 94 ++++++++++++++ ...na_seed42_eng_amh_esp_roman_pipeline_en.md | 70 ++++++++++ .../2024-09-05-furina_seed42_eng_en.md | 94 ++++++++++++++ ...024-09-05-furina_seed42_eng_pipeline_en.md | 70 ++++++++++ ...2024-09-05-g3_finetuned_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-05-game_content_safety_en.md | 94 ++++++++++++++ ...4-09-05-game_content_safety_pipeline_en.md | 70 ++++++++++ .../2024-09-05-ganda_english_conrad747_en.md | 94 ++++++++++++++ ...024-09-05-german_text_classification_de.md | 94 ++++++++++++++ ...-german_text_classification_pipeline_de.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-greberta_en.md | 94 ++++++++++++++ .../2024-09-05-greberta_pipeline_en.md | 70 ++++++++++ .../2024-09-05-gun_nlth_base_en.md | 94 ++++++++++++++ .../2024-09-05-gun_nlth_base_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-gysbert_v2_en.md | 94 ++++++++++++++ .../2024-09-05-gysbert_v2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-hafez_ner_fa.md | 94 ++++++++++++++ .../2024-09-05-hafez_ner_pipeline_fa.md | 70 ++++++++++ ...9-05-hate_hate_random1_seed0_bernice_en.md | 94 ++++++++++++++ ..._hate_random1_seed0_bernice_pipeline_en.md | 70 ++++++++++ ...24-09-05-helsinki_danish_swedish_v17_en.md | 94 ++++++++++++++ ...helsinki_danish_swedish_v17_pipeline_en.md | 70 ++++++++++ ...024-09-05-helsinki_danish_swedish_v6_en.md | 94 ++++++++++++++ ...-helsinki_danish_swedish_v6_pipeline_en.md | 70 ++++++++++ ...-05-helsinki_nlp_opus_maltese_uyghur_en.md | 94 ++++++++++++++ ...nki_nlp_opus_maltese_uyghur_pipeline_en.md | 70 ++++++++++ .../2024-09-05-hf_distilbert_imdb_mlm_en.md | 92 ++++++++++++++ ...9-05-hf_distilbert_imdb_mlm_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-hihu2_en.md | 94 ++++++++++++++ .../2024-09-05-hihu2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-hihu4_en.md | 94 ++++++++++++++ .../2024-09-05-hihu4_pipeline_en.md | 70 ++++++++++ .../2024-09-05-hindi_marathi_dev_albert_hi.md | 94 ++++++++++++++ ...05-hindi_marathi_dev_albert_pipeline_hi.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-hiner_di_en.md | 94 ++++++++++++++ .../2024-09-05-hiner_di_pipeline_en.md | 70 ++++++++++ .../2024-09-05-hotelbert_small_de.md | 94 ++++++++++++++ .../2024-09-05-hotelbert_small_pipeline_de.md | 70 ++++++++++ ...etection_distilbert_luciayn_pipeline_en.md | 70 ++++++++++ .../2024-09-05-hw001_leochenwj_en.md | 94 ++++++++++++++ .../2024-09-05-hw001_leochenwj_pipeline_en.md | 70 ++++++++++ .../2024-09-05-hw01_liamli1991_en.md | 94 ++++++++++++++ ...rt_roberta_base_finetuned_wikineural_en.md | 94 ++++++++++++++ .../2024-09-05-icebert_vesteinn_is.md | 94 ++++++++++++++ ...2024-09-05-icebert_vesteinn_pipeline_is.md | 70 ++++++++++ ...mdb_spoiler_distilbertorigdatasetlr1_en.md | 94 ++++++++++++++ ...er_distilbertorigdatasetlr1_pipeline_en.md | 70 ++++++++++ ...5-improved_xlm_roberta_base_nodroput_en.md | 94 ++++++++++++++ ...d_xlm_roberta_base_nodroput_pipeline_en.md | 70 ++++++++++ .../2024-09-05-incollection_recognizer_en.md | 94 ++++++++++++++ ...c_bert_finetuned_legal_try0_pipeline_en.md | 70 ++++++++++ .../2024-09-05-indicbertner_en.md | 94 ++++++++++++++ .../2024-09-05-indicbertner_pipeline_en.md | 70 ++++++++++ ...2024-09-05-indonesian_multi_pipeline_id.md | 69 ++++++++++ .../2024-09-05-inproceedings_recognizer_en.md | 94 ++++++++++++++ .../2024-09-05-issuebert_large_en.md | 94 ++++++++++++++ .../2024-09-05-issuebert_large_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-isy503_a03_en.md | 94 ++++++++++++++ .../2024-09-05-isy503_a03_pipeline_en.md | 70 ++++++++++ .../2024-09-05-italian_ner_xxl_it.md | 94 ++++++++++++++ .../2024-09-05-italian_ner_xxl_pipeline_it.md | 70 ++++++++++ ..._marian_big_ctx4_cwd4_english_french_en.md | 94 ++++++++++++++ .../2024-09-05-kolivia_classifier_v2_en.md | 94 ++++++++++++++ ...09-05-kolivia_classifier_v2_pipeline_en.md | 70 ++++++++++ ...09-05-korean_finance_news_classifier_ko.md | 94 ++++++++++++++ ...ean_finance_news_classifier_pipeline_ko.md | 70 ++++++++++ .../2024-09-05-lab1_random_jarmac_en.md | 94 ++++++++++++++ ...24-09-05-lab1_random_jarmac_pipeline_en.md | 70 ++++++++++ .../2024-09-05-lab1_random_robinysh_en.md | 94 ++++++++++++++ ...-09-05-lab1_random_robinysh_pipeline_en.md | 70 ++++++++++ ...2024-09-05-lab1_true_random_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-lavan_en.md | 86 +++++++++++++ .../2024-09-05-lavan_pipeline_en.md | 69 ++++++++++ ...5-legal_longformer_base_8192_spanish_es.md | 94 ++++++++++++++ ...ongformer_base_8192_spanish_pipeline_es.md | 70 ++++++++++ .../2024-09-05-linkbert_pipeline_en.md | 70 ++++++++++ .../2024-09-05-litberta_uncased_lt.md | 94 ++++++++++++++ ...2024-09-05-litberta_uncased_pipeline_lt.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-ltrc_albert_en.md | 94 ++++++++++++++ .../2024-09-05-ltrc_albert_pipeline_en.md | 70 ++++++++++ .../2024-09-05-m8_mlm_pipeline_en.md | 70 ++++++++++ ...il_40_finetune_intent_classification_en.md | 94 ++++++++++++++ ...etune_intent_classification_pipeline_en.md | 70 ++++++++++ .../2024-09-05-malbert_base_cased_32k_fr.md | 94 ++++++++++++++ .../2024-09-05-malbert_base_cased_64k_fr.md | 94 ++++++++++++++ ...9-05-malbert_base_cased_64k_pipeline_fr.md | 70 ++++++++++ .../2024-09-05-marathi_albert_mr.md | 94 ++++++++++++++ .../2024-09-05-marathi_albert_pipeline_mr.md | 70 ++++++++++ ...024-09-05-marian_english_german_test_en.md | 94 ++++++++++++++ ...-marian_english_german_test_pipeline_en.md | 70 ++++++++++ ...nds_french_accelerate_magnustragardh_en.md | 94 ++++++++++++++ ...islands_french_accelerate_mithegooie_en.md | 94 ++++++++++++++ ...rench_accelerate_mithegooie_pipeline_en.md | 70 ++++++++++ ...nga_tonga_islands_french_douglaschan_en.md | 94 ++++++++++++++ ..._islands_french_douglaschan_pipeline_en.md | 70 ++++++++++ ...se_tonga_tonga_islands_english_test1_en.md | 94 ++++++++++++++ ...tonga_islands_english_test1_pipeline_en.md | 70 ++++++++++ ...4-09-05-marianmt_ufal_english_french_en.md | 94 ++++++++++++++ ...arianmt_ufal_english_french_pipeline_en.md | 70 ++++++++++ .../2024-09-05-mdeberta_base_v3_6_en.md | 94 ++++++++++++++ ...24-09-05-mdeberta_base_v3_6_pipeline_en.md | 70 ++++++++++ ...ta_v3_base_assin_entailment_pipeline_pt.md | 70 ++++++++++ ...05-mdeberta_v3_base_assin_entailment_pt.md | 94 ++++++++++++++ ...-05-mdeberta_v3_base_caresa_pipeline_es.md | 70 ++++++++++ .../2024-09-05-mdeberta_v3_base_qqp_1_en.md | 94 ++++++++++++++ ...9-05-mdeberta_v3_base_qqp_1_pipeline_en.md | 70 ++++++++++ ...v3_base_triplet_critic_xnli_pipeline_xx.md | 70 ++++++++++ ...mdeberta_v3_base_triplet_critic_xnli_xx.md | 94 ++++++++++++++ ...sification_slovene_data_augmentation_en.md | 94 ++++++++++++++ ...n_slovene_data_augmentation_pipeline_en.md | 70 ++++++++++ ...ertav3_subjectivity_turkish_pipeline_tr.md | 70 ++++++++++ ...9-05-mdebertav3_subjectivity_turkish_tr.md | 94 ++++++++++++++ .../2024-09-05-mdt_ie_re_baseline_en.md | 94 ++++++++++++++ ...24-09-05-mdt_ie_re_baseline_pipeline_en.md | 70 ++++++++++ .../2024-09-05-medical_enes_basque_en.md | 94 ++++++++++++++ .../2024-09-05-medical_english_basque_en.md | 94 ++++++++++++++ ...9-05-medical_english_basque_pipeline_en.md | 70 ++++++++++ .../2024-09-05-medidalroberta_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-mentalberta_en.md | 94 ++++++++++++++ .../2024-09-05-mentalberta_pipeline_en.md | 70 ++++++++++ .../2024-09-05-mlm_acutal_bangla_hate_en.md | 94 ++++++++++++++ ...9-05-mlm_acutal_bangla_hate_pipeline_en.md | 70 ++++++++++ .../2024-09-05-model11epochs_en.md | 94 ++++++++++++++ .../2024-09-05-model11epochs_pipeline_en.md | 70 ++++++++++ .../2024-09-05-model_albert_5000_1_en.md | 94 ++++++++++++++ .../2024-09-05-model_arebmann_pipeline_en.md | 70 ++++++++++ ...st_hter_english_czech_pharmaceutical_en.md | 94 ++++++++++++++ ...nglish_czech_pharmaceutical_pipeline_en.md | 70 ++++++++++ .../2024-09-05-movie_roberta_base_en.md | 94 ++++++++++++++ ...24-09-05-movie_roberta_base_pipeline_en.md | 70 ++++++++++ ...ation_mitigation_classifier_pipeline_en.md | 70 ++++++++++ ...mpnet_base_airlines_news_multi_label_en.md | 94 ++++++++++++++ ...e_airlines_news_multi_label_pipeline_en.md | 70 ++++++++++ ...2024-09-05-mpnet_base_edu_classifier_en.md | 94 ++++++++++++++ ...5-mpnet_base_edu_classifier_pipeline_en.md | 70 ++++++++++ ...024-09-05-mpnet_base_news_about_gold_en.md | 94 ++++++++++++++ ...-mpnet_base_news_about_gold_pipeline_en.md | 70 ++++++++++ ...pc_microsoft_deberta_v3_large_seed_3_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_3_pipeline_en.md | 70 ++++++++++ ...k_until_2_08_finrtuned_on_21_7_model_en.md | 86 +++++++++++++ ..._08_finrtuned_on_21_7_model_pipeline_en.md | 69 ++++++++++ .../2024-09-05-multi_balanced_model_en.md | 94 ++++++++++++++ ...09-05-multilingual_sentiment_covid19_xx.md | 94 ++++++++++++++ ...ingual_toxic_text_detection_pipeline_xx.md | 70 ++++++++++ ...05-multilingual_toxic_text_detection_xx.md | 94 ++++++++++++++ .../2024-09-05-muppet_roberta_base_en.md | 94 ++++++++++++++ ...4-09-05-muppet_roberta_base_pipeline_en.md | 70 ++++++++++ ...r_sentiment_afriberta_large_pipeline_en.md | 70 ++++++++++ ...sed_portuguese_lenerbr_finetuned_ner_en.md | 94 ++++++++++++++ .../2024-09-05-ner_cw_pipeline_testt_en.md | 66 ++++++++++ .../ahmedlone127/2024-09-05-ner_demo_en.md | 94 ++++++++++++++ ...4-09-05-ner_fine_tuned_beto_pipeline_es.md | 70 ++++++++++ .../2024-09-05-ner_meddocan_pipeline_es.md | 70 ++++++++++ ...netuned_500k_adamw_3_epoch_locations_en.md | 92 ++++++++++++++ ...00k_adamw_3_epoch_locations_pipeline_en.md | 70 ++++++++++ .../2024-09-05-ner_totalamount_en.md | 94 ++++++++++++++ ...9-05-nerd_nerd_random2_seed1_bernice_en.md | 94 ++++++++++++++ ..._nerd_random2_seed1_bernice_pipeline_en.md | 70 ++++++++++ ...9-05-nerd_nerd_random2_seed2_bernice_en.md | 94 ++++++++++++++ ..._nerd_random2_seed2_bernice_pipeline_en.md | 70 ++++++++++ .../2024-09-05-nerkor_cars_onpp_hubert_hu.md | 94 ++++++++++++++ ...24-09-05-neural_cherche_sparse_embed_en.md | 94 ++++++++++++++ ...neural_cherche_sparse_embed_pipeline_en.md | 70 ++++++++++ .../2024-09-05-neuraspeech_whisperbase_fa.md | 84 ++++++++++++ ...-05-neuraspeech_whisperbase_pipeline_fa.md | 69 ++++++++++ .../2024-09-05-newsbertje_base_en.md | 94 ++++++++++++++ .../2024-09-05-newsbertje_base_pipeline_en.md | 70 ++++++++++ ...li_conventional_fine_tuning_m4faisal_en.md | 94 ++++++++++++++ ...ontradiction_classification_pipeline_en.md | 70 ++++++++++ .../2024-09-05-noise_memo_bert_3_02_en.md | 94 ++++++++++++++ ...-09-05-noise_memo_bert_3_02_pipeline_en.md | 70 ++++++++++ ...024-09-05-norwegian_bokml_bert_large_no.md | 94 ++++++++++++++ ...kml_roberta_base_scandi_1e4_pipeline_en.md | 70 ++++++++++ ...bokml_whisper_small_verbatim_nbailab_no.md | 84 ++++++++++++ ...sper_small_verbatim_nbailab_pipeline_no.md | 69 ++++++++++ ...an_intent_classifier_model2_pipeline_no.md | 70 ++++++++++ .../2024-09-05-nuner_v0_1_pipeline_en.md | 70 ++++++++++ .../2024-09-05-nuner_v1_orgs_en.md | 94 ++++++++++++++ ...24-09-05-nuner_v2_fewnerd_fine_super_en.md | 94 ++++++++++++++ ...ningrate2e_05_batchsize8_11_action_1_en.md | 94 ++++++++++++++ ...e_05_batchsize8_11_action_1_pipeline_en.md | 70 ++++++++++ ...-05-opus_maltese_english_chinese_twi_en.md | 94 ++++++++++++++ ...maltese_english_chinese_twi_pipeline_en.md | 70 ++++++++++ ...lish_tonga_tonga_islands_french_sy23_en.md | 94 ++++++++++++++ ...a_tonga_islands_french_sy23_pipeline_en.md | 70 ++++++++++ ...tese_english_french_finetuned_must_c_en.md | 94 ++++++++++++++ ...ish_french_finetuned_must_c_pipeline_en.md | 70 ++++++++++ ...ish_tonga_tonga_islands_ganda_tobius_en.md | 94 ++++++++++++++ ..._tonga_islands_ganda_tobius_pipeline_en.md | 70 ++++++++++ ...h_indonesian_ccmatrix_lr_5_best_bleu_en.md | 94 ++++++++++++++ ...ian_ccmatrix_lr_5_best_bleu_pipeline_en.md | 70 ++++++++++ ...nga_tonga_islands_romanian_mpiccardi_en.md | 94 ++++++++++++++ ..._islands_romanian_mpiccardi_pipeline_en.md | 70 ++++++++++ ...tonga_tonga_islands_english_european_en.md | 94 ++++++++++++++ ..._english_tonga_tonga_islands_spanish_es.md | 94 ++++++++++++++ ...tonga_tonga_islands_spanish_pipeline_es.md | 70 ++++++++++ ...ish_tonga_tonga_islands_sja_pipeline_en.md | 70 ++++++++++ ...nese_tonga_tonga_islands_english_xml_en.md | 94 ++++++++++++++ ...a_tonga_islands_english_xml_pipeline_en.md | 70 ++++++++++ ...tonga_islands_thai_galucier_pipeline_en.md | 70 ++++++++++ ...24-09-05-output_ben_epstein_pipeline_en.md | 70 ++++++++++ ...5-paiute_tonga_tonga_islands_english_en.md | 94 ++++++++++++++ .../2024-09-05-patstat_citation_parser_en.md | 94 ++++++++++++++ ...-05-patstat_citation_parser_pipeline_en.md | 70 ++++++++++ ...09-05-phowhisper_base_vinai_pipeline_vi.md | 69 ++++++++++ .../2024-09-05-phowhisper_base_vinai_vi.md | 84 ++++++++++++ .../2024-09-05-pii_model_ankitcodes_en.md | 94 ++++++++++++++ .../2024-09-05-pll_model_pipeline_en.md | 70 ++++++++++ ...2024-09-05-poe_qa_mpnetbase_pipeline_en.md | 70 ++++++++++ ...uation_restoration_sanivert_pipeline_pt.md | 70 ++++++++++ ...e_up_xlmr_fewshot_falsetrue_0_0_best_en.md | 94 ++++++++++++++ ..._fewshot_falsetrue_0_0_best_pipeline_en.md | 70 ++++++++++ .../2024-09-05-predicting_misdirection_en.md | 94 ++++++++++++++ ...-pretrained_xlm_portuguese_e5_select_en.md | 94 ++++++++++++++ ...ed_xlm_portuguese_e5_select_pipeline_en.md | 70 ++++++++++ .../2024-09-05-ptsdbert_large_en.md | 94 ++++++++++++++ .../2024-09-05-ptsdbert_large_pipeline_en.md | 70 ++++++++++ ...024-09-05-pubchem10m_smiles_bpe_120k_en.md | 94 ++++++++++++++ ...2024-09-05-pubchem10m_smiles_bpe_50k_en.md | 94 ++++++++++++++ .../2024-09-05-puoberta_pipeline_tn.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-puoberta_tn.md | 94 ++++++++++++++ .../2024-09-05-qa_redaction_nov1_16_en.md | 86 +++++++++++++ ...-09-05-qa_redaction_nov1_16_pipeline_en.md | 69 ++++++++++ .../2024-09-05-qa_redaction_nov1_19_a2_en.md | 86 +++++++++++++ ...-05-qa_redaction_nov1_19_a2_pipeline_en.md | 69 ++++++++++ ...05-qa_synth_02_oct_with_finetune_1_1_en.md | 86 +++++++++++++ ...5-qa_synth_22_sept_with_finetune_1_0_en.md | 86 +++++++++++++ ...h_22_sept_with_finetune_1_0_pipeline_en.md | 69 ++++++++++ ...5-qa_synth_25_sept_with_finetune_1_0_en.md | 86 +++++++++++++ ...h_25_sept_with_finetune_1_0_pipeline_en.md | 69 ++++++++++ ...5-qa_synth_26_sept_with_finetune_1_1_en.md | 86 +++++++++++++ ...h_26_sept_with_finetune_1_1_pipeline_en.md | 69 ++++++++++ ...5-qa_synth_27_sept_with_finetune_1_1_en.md | 86 +++++++++++++ ...h_27_sept_with_finetune_1_1_pipeline_en.md | 69 ++++++++++ ...able_23_aug_xlm_fnetune_1_0_pipeline_en.md | 69 ++++++++++ ...-09-05-qa_synthetic_data_only_16_aug_en.md | 86 +++++++++++++ ..._synthetic_data_only_16_aug_pipeline_en.md | 69 ++++++++++ ...ase_nepal_bhasa_finetuned_am_infoweb_en.md | 86 +++++++++++++ ..._bhasa_finetuned_am_infoweb_pipeline_en.md | 69 ++++++++++ ..._nepal_bhasa_finetuned_anuragsingh28_en.md | 86 +++++++++++++ ...ic_data_only_finetuned_v1_0_pipeline_en.md | 69 ++++++++++ ...on_huq_xlm_roberta_english_hungarian_en.md | 94 ++++++++++++++ ...m_roberta_english_hungarian_pipeline_en.md | 70 ++++++++++ ...-question_answering_xlm_roberta_base_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-05-recipebert_en.md | 94 ++++++++++++++ .../2024-09-05-recipebert_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-regr_4_en.md | 94 ++++++++++++++ .../2024-09-05-regr_4_pipeline_en.md | 70 ++++++++++ .../2024-09-05-regression_albert_2_en.md | 94 ++++++++++++++ ...4-09-05-regression_albert_2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-results_benuehlinger_en.md | 94 ++++++++++++++ ...-09-05-results_benuehlinger_pipeline_en.md | 70 ++++++++++ ..._model_deberta_v3_unit_test_pipeline_en.md | 70 ++++++++++ ...er_distilbert_base_cased_system_b_v1_en.md | 94 ++++++++++++++ ...bert_base_cased_system_b_v1_pipeline_en.md | 70 ++++++++++ .../2024-09-05-rise_ner_reduced_en.md | 94 ++++++++++++++ ...2024-09-05-rise_ner_reduced_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_base_1b_2_en.md | 94 ++++++++++++++ .../2024-09-05-roberta_base_afacta_en.md | 94 ++++++++++++++ ...4-09-05-roberta_base_afacta_pipeline_en.md | 70 ++++++++++ ...inetuned_tripadvisordomainadaptation_en.md | 94 ++++++++++++++ ...tripadvisordomainadaptation_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_base_cheng98_en.md | 94 ++++++++++++++ ...-09-05-roberta_base_cheng98_pipeline_en.md | 70 ++++++++++ ...09-05-roberta_base_epoch_52_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_base_epoch_66_en.md | 94 ++++++++++++++ ...09-05-roberta_base_epoch_66_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_base_epoch_9_en.md | 94 ++++++++++++++ ...-09-05-roberta_base_epoch_9_pipeline_en.md | 70 ++++++++++ ...4-09-05-roberta_base_exp_32_pipeline_xx.md | 70 ++++++++++ ..._base_finetuned_ner_sevixdd_pipeline_en.md | 70 ++++++++++ ...roberta_base_finetuned_squad_hasan55_en.md | 86 +++++++++++++ ...ase_finetuned_squad_hasan55_pipeline_en.md | 69 ++++++++++ ..._base_finetuned_wallisian_manual_2ep_en.md | 94 ++++++++++++++ ...etuned_wallisian_manual_2ep_pipeline_en.md | 70 ++++++++++ ...2024-09-05-roberta_base_marathi_marh_en.md | 94 ++++++++++++++ ...024-09-05-roberta_base_ner_conll2003_en.md | 94 ++++++++++++++ ...-roberta_base_ner_conll2003_pipeline_en.md | 70 ++++++++++ ...05-roberta_base_ner_updated_pipeline_mn.md | 70 ++++++++++ ...-05-roberta_base_sentiment_bulgarian_bg.md | 94 ++++++++++++++ ...ta_base_sentiment_bulgarian_pipeline_bg.md | 70 ++++++++++ ...4-09-05-roberta_base_squad2_f_arnold_en.md | 86 +++++++++++++ ...oberta_base_squad2_f_arnold_pipeline_en.md | 69 ++++++++++ ...-09-05-roberta_base_squad2_graphcore_en.md | 86 +++++++++++++ ...berta_base_squad2_graphcore_pipeline_en.md | 69 ++++++++++ ...09-05-roberta_conll_learning_rate2e5_en.md | 94 ++++++++++++++ ...erta_conll_learning_rate2e5_pipeline_en.md | 70 ++++++++++ ...24-09-05-roberta_fake_news_detection_en.md | 94 ++++++++++++++ ...roberta_fake_news_detection_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-roberta_go_en.md | 94 ++++++++++++++ .../2024-09-05-roberta_go_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_large_1802_en.md | 94 ++++++++++++++ .../2024-09-05-roberta_large_depression_en.md | 94 ++++++++++++++ ...05-roberta_large_depression_pipeline_en.md | 70 ++++++++++ .../2024-09-05-roberta_large_mrqa_en.md | 86 +++++++++++++ ...24-09-05-roberta_large_mrqa_pipeline_en.md | 69 ++++++++++ .../2024-09-05-roberta_med_small_1m_2_en.md | 94 ++++++++++++++ ...9-05-roberta_med_small_1m_2_pipeline_en.md | 70 ++++++++++ ..._mlm_for_protein_clustering_pipeline_en.md | 70 ++++++++++ ..._ner_craft_augmentedtransfer_spanish_es.md | 94 ++++++++++++++ .../2024-09-05-roberta_qa_01_dialdoc_en.md | 92 ++++++++++++++ ...09-05-roberta_qa_01_dialdoc_pipeline_en.md | 69 ++++++++++ .../2024-09-05-roberta_qa_IceBERT_QA_en.md | 106 ++++++++++++++++ ...09-05-roberta_qa_IceBERT_QA_pipeline_en.md | 69 ++++++++++ ...base_squad2_finetuned_squad_pipeline_en.md | 69 ++++++++++ ...berta_qa_base_filtered_cuad_pipeline_en.md | 69 ++++++++++ ...erta_qa_movie_roberta_MITmovie_squad_en.md | 106 ++++++++++++++++ ...cipes_base_timestep_squadv2_epochs_3_en.md | 92 ++++++++++++++ ...e_timestep_squadv2_epochs_3_pipeline_en.md | 69 ++++++++++ ...9-05-roberta_qa_roberta_paraphrasev3_en.md | 106 ++++++++++++++++ ...rta_qa_roberta_paraphrasev3_pipeline_en.md | 69 ++++++++++ ...-09-05-roberta_small_basque_pipeline_eu.md | 70 ++++++++++ ...05-roberta_stress_detection_pipeline_en.md | 70 ++++++++++ ...a_xlm_finetuned_amazon_conversations_en.md | 94 ++++++++++++++ ...etuned_amazon_conversations_pipeline_en.md | 70 ++++++++++ ...24-09-05-robertacnnrnnfnntransformer_en.md | 94 ++++++++++++++ ...robertacnnrnnfnntransformer_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-roberto_en.md | 94 ++++++++++++++ .../2024-09-05-roberto_pipeline_en.md | 70 ++++++++++ ...4-09-05-rockbook_finetuned_legalbert_en.md | 86 +++++++++++++ ...ockbook_finetuned_legalbert_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-05-rpa_synth1_en.md | 86 +++++++++++++ .../2024-09-05-rpa_synth1_on_7_nov_en.md | 86 +++++++++++++ ...4-09-05-rpa_synth1_on_7_nov_pipeline_en.md | 69 ++++++++++ .../2024-09-05-rpa_synth1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-rtmex23_pol4_cardif_en.md | 94 ++++++++++++++ ...ert_base_cased_conversational_ner_v3_en.md | 94 ++++++++++++++ .../2024-09-05-rulebert_v0_0_k0_it.md | 94 ++++++++++++++ ...2024-09-05-rulebert_v0_0_k0_pipeline_it.md | 70 ++++++++++ .../2024-09-05-rupunct_big_pipeline_ru.md | 70 ++++++++++ ...thankstransformer_fil2en_v1_pipeline_en.md | 70 ++++++++++ ...05-salamathankstransformer_fil2en_v2_en.md | 94 ++++++++++++++ ...thankstransformer_fil2en_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-samind_sentiment_en.md | 94 ++++++++++++++ ...024-09-05-sanskrit_saskta_distilbert_en.md | 94 ++++++++++++++ ...-sanskrit_saskta_distilbert_pipeline_en.md | 70 ++++++++++ ...09-05-sayula_popoluca_tagger_3112_v3_en.md | 94 ++++++++++++++ ...ula_popoluca_tagger_3112_v3_pipeline_en.md | 70 ++++++++++ ...-05-sbert_punc_case_russian_pipeline_ru.md | 70 ++++++++++ .../2024-09-05-sbert_punc_case_russian_ru.md | 94 ++++++++++++++ ...al_sentiments_malay_model_x_pipeline_xx.md | 70 ++++++++++ ...ultilingual_sentiments_malay_model_x_xx.md | 94 ++++++++++++++ ..._cl_d2_data_cl_cardiff_cl_only_gamma_en.md | 94 ++++++++++++++ ...ta_cl_cardiff_cl_only_gamma_pipeline_en.md | 70 ++++++++++ ..._cl_d2_data_cl_cardiff_cl_only_alpha_en.md | 94 ++++++++++++++ ...ta_cl_cardiff_cl_only_alpha_pipeline_en.md | 70 ++++++++++ ...data_amazonscience_massive_all_1_1_b_en.md | 94 ++++++++++++++ ...onscience_massive_all_1_1_b_pipeline_en.md | 70 ++++++++++ ...rio_tcr_data_cl_cardiff_cl_only29297_en.md | 94 ++++++++++++++ ...ata_cl_cardiff_cl_only29297_pipeline_en.md | 70 ++++++++++ ...05-scibert_finetuned_ades_sonatafyai_en.md | 94 ++++++++++++++ ...05-scoris_maltese_lithuanian_english_lt.md | 94 ++++++++++++++ ..._maltese_lithuanian_english_pipeline_lt.md | 70 ++++++++++ ...024-09-05-secdisclosure_28l_pipeline_en.md | 70 ++++++++++ .../2024-09-05-sent_arbertv2_ar.md | 94 ++++++++++++++ ...awesome_align_with_corsican_pipeline_xx.md | 71 +++++++++++ ...-05-sent_awesome_align_with_corsican_xx.md | 94 ++++++++++++++ ...05-sent_berel_finetuned_dss_maskedlm_en.md | 94 ++++++++++++++ ...-09-05-sent_bert_base_qarib_pipeline_ar.md | 71 +++++++++++ ...ased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt.md | 80 ++++++++++++ ...ed_legal_mlm_gpl_nli_sts_v1_pipeline_pt.md | 71 +++++++++++ ...t_bert_tagalog_base_uncased_pipeline_tl.md | 71 +++++++++++ ...09-05-sent_bert_tagalog_base_uncased_tl.md | 94 ++++++++++++++ .../2024-09-05-sent_bioformer_16l_en.md | 94 ++++++++++++++ ...24-09-05-sent_bioformer_16l_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_checkpoint_21200_en.md | 94 ++++++++++++++ ...024-09-05-sent_chemical_bert_uncased_en.md | 94 ++++++++++++++ ...-sent_chemical_bert_uncased_pipeline_en.md | 71 +++++++++++ ...nt_corsican_condenser_marco_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_darijabert_arabizi_ar.md | 94 ++++++++++++++ ...-05-sent_darijabert_arabizi_pipeline_ar.md | 71 +++++++++++ ...-09-05-sent_drbert_4gb_cp_pubmedbert_fr.md | 94 ++++++++++++++ ...nt_drbert_4gb_cp_pubmedbert_pipeline_fr.md | 71 +++++++++++ ...05-sent_fairlex_cail_minilm_pipeline_zh.md | 71 +++++++++++ ...024-09-05-sent_furina_indic_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_german_medbert_de.md | 94 ++++++++++++++ ...4-09-05-sent_german_medbert_pipeline_de.md | 71 +++++++++++ .../2024-09-05-sent_gujibert_fan_en.md | 94 ++++++++++++++ ...024-09-05-sent_gujibert_fan_pipeline_en.md | 71 +++++++++++ .../ahmedlone127/2024-09-05-sent_hebert_en.md | 94 ++++++++++++++ .../2024-09-05-sent_hebert_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_incaselawbert_en.md | 94 ++++++++++++++ ...24-09-05-sent_incaselawbert_pipeline_en.md | 71 +++++++++++ ...nt_jurisbert_base_portuguese_uncased_en.md | 94 ++++++++++++++ ...ert_base_portuguese_uncased_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_kcbert_base_ko.md | 94 ++++++++++++++ ...2024-09-05-sent_kcbert_base_pipeline_ko.md | 71 +++++++++++ ...nt_malay_coa_legal_bert_base_uncased_en.md | 94 ++++++++++++++ ...coa_legal_bert_base_uncased_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_nepalibert_en.md | 94 ++++++++++++++ .../2024-09-05-sent_nepalibert_pipeline_en.md | 71 +++++++++++ ...norwegian_bokml_bert_ncc_male2female_en.md | 94 ++++++++++++++ ..._bokml_bert_ncc_male2female_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_recipebert_en.md | 94 ++++++++++++++ .../2024-09-05-sent_recipebert_pipeline_en.md | 71 +++++++++++ ...-05-sent_simlm_base_msmarco_pipeline_en.md | 71 +++++++++++ .../2024-09-05-sent_storieslm_v1_1963_en.md | 94 ++++++++++++++ ...9-05-sent_storieslm_v1_1963_pipeline_en.md | 71 +++++++++++ ...5-sent_xlm_roberta_base_ft_cstwitter_en.md | 94 ++++++++++++++ ...-05-sent_xlm_roberta_base_pretrained_en.md | 94 ++++++++++++++ ...-sentence_classification4designtutor_en.md | 94 ++++++++++++++ ..._classification4designtutor_pipeline_en.md | 70 ++++++++++ ...024-09-05-sentiment_analysis_albert1_en.md | 94 ++++++++++++++ ...-sentiment_analysis_albert1_pipeline_en.md | 70 ++++++++++ ...small_random2_seed0_bernice_pipeline_en.md | 70 ++++++++++ ...5-sept_1_2024_awesome_eli5_mlm_model_en.md | 94 ++++++++++++++ ...ch_tonga_tonga_islands_code_pipeline_en.md | 70 ++++++++++ ...24-09-05-setfit_finetuned_classifier_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-05-shopee_ner_en.md | 94 ++++++++++++++ ...24-09-05-short_answer_classification_en.md | 94 ++++++++++++++ ...short_answer_classification_pipeline_en.md | 70 ++++++++++ .../2024-09-05-singberta_pipeline_en.md | 70 ++++++++++ ...4-09-05-sinhala_roberta_mc4_pipeline_si.md | 70 ++++++++++ .../2024-09-05-sinhala_roberta_mc4_si.md | 94 ++++++++++++++ .../2024-09-05-sinhalese_bert_ner_en.md | 94 ++++++++++++++ ...24-09-05-sinhalese_bert_ner_pipeline_en.md | 70 ++++++++++ ...-sitexsometre_camembert_large_stsb50_en.md | 94 ++++++++++++++ ...etre_camembert_large_stsb50_pipeline_en.md | 70 ++++++++++ ...05-spanish_eng_xlm_roberta_sentiment_en.md | 94 ++++++++++++++ ...h_eng_xlm_roberta_sentiment_pipeline_en.md | 70 ++++++++++ .../2024-09-05-splade_pp_english_v1_en.md | 94 ++++++++++++++ ...-09-05-splade_pp_english_v1_pipeline_en.md | 70 ++++++++++ ...-05-srberta_nemanjapetrovic_pipeline_sr.md | 70 ++++++++++ .../2024-09-05-srberta_nemanjapetrovic_sr.md | 94 ++++++++++++++ .../2024-09-05-stance_detection_en.md | 98 ++++++++++++++ ...2024-09-05-stance_detection_pipeline_en.md | 70 ++++++++++ ...poch_70_2024_07_26_16_03_28_pipeline_en.md | 70 ++++++++++ ...e_roberta_base_filtered_137_pipeline_en.md | 70 ++++++++++ ...5-stsb_mpnet_basev2_sitexse_pipeline_en.md | 70 ++++++++++ ...09-05-sunbird_english_ganda_pipeline_en.md | 70 ++++++++++ ...9-05-sundanese_roberta_base_pipeline_su.md | 70 ++++++++++ .../2024-09-05-sundanese_roberta_base_su.md | 94 ++++++++++++++ .../2024-09-05-t2t_gun_nlth_from_base_en.md | 94 ++++++++++++++ ...5-t2t_gun_nlth_from_stratch_pipeline_en.md | 70 ++++++++++ ...4-09-05-tajik_messages_classificator_en.md | 94 ++++++++++++++ ...ajik_messages_classificator_pipeline_en.md | 70 ++++++++++ ...ss_base_pretraining_model_full_train_en.md | 94 ++++++++++++++ ...retraining_model_full_train_pipeline_en.md | 70 ++++++++++ ..._task__model_deberta__aug_method_rsa_en.md | 94 ++++++++++++++ ...del_deberta__aug_method_rsa_pipeline_en.md | 70 ++++++++++ .../2024-09-05-tech_roberta_pipeline_vi.md | 70 ++++++++++ .../2024-09-05-tech_roberta_vi.md | 94 ++++++++++++++ ...-05-test_airbus_year_report_pipeline_en.md | 70 ++++++++++ ...2024-09-05-test_demo_qa_with_roberta_en.md | 86 +++++++++++++ .../2024-09-05-test_directory_en.md | 94 ++++++++++++++ .../2024-09-05-test_directory_pipeline_en.md | 70 ++++++++++ .../_posts/ahmedlone127/2024-09-05-test_en.md | 120 ++++++++++++++++++ .../2024-09-05-test_pipeline_en.md | 69 ++++++++++ .../2024-09-05-testing_model_en.md | 94 ++++++++++++++ .../2024-09-05-testing_model_pipeline_en.md | 70 ++++++++++ .../2024-09-05-timeset_ifm_pipeline_en.md | 69 ++++++++++ .../2024-09-05-tiny_distill_2601_2_en.md | 94 ++++++++++++++ ...4-09-05-tiny_distill_2601_2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-tokenclassificationmodel_en.md | 94 ++++++++++++++ ...05-tokenclassificationmodel_pipeline_en.md | 70 ++++++++++ .../2024-09-05-topic_weather_en.md | 94 ++++++++++++++ ...comment_classification_using_roberta_en.md | 94 ++++++++++++++ ...lassification_using_roberta_pipeline_en.md | 70 ++++++++++ ...2024-09-05-trained_baseline_pipeline_en.md | 70 ++++++++++ .../2024-09-05-trained_croatian_en.md | 94 ++++++++++++++ .../2024-09-05-trained_slovak_pipeline_en.md | 70 ++++++++++ ...anslation_for_recipes_english_french_en.md | 94 ++++++++++++++ ..._for_recipes_english_french_pipeline_en.md | 70 ++++++++++ ...24-09-05-translations_english_german_en.md | 94 ++++++++++++++ ...translations_english_german_pipeline_en.md | 70 ++++++++++ ...ugstugi_bengaliai_asr_whisper_medium_en.md | 84 ++++++++++++ ...engaliai_asr_whisper_medium_pipeline_en.md | 69 ++++++++++ .../2024-09-05-turkish_acc_80_en.md | 94 ++++++++++++++ .../2024-09-05-turkish_acc_80_pipeline_en.md | 70 ++++++++++ ...t_capitalization_correction_pipeline_tr.md | 70 ++++++++++ ..._base_bert_capitalization_correction_tr.md | 94 ++++++++++++++ .../2024-09-05-turkishtranslator_en.md | 94 ++++++++++++++ ...024-09-05-turkishtranslator_pipeline_en.md | 70 ++++++++++ ...berta_base_sentiment_finetuned_memes_en.md | 94 ++++++++++++++ ...e_sentiment_finetuned_memes_pipeline_en.md | 70 ++++++++++ ...itter_roberta_base_2019_90m_pipeline_en.md | 70 ++++++++++ ...oberta_base_2021_124m_irony_pipeline_en.md | 70 ++++++++++ ...4-09-05-twitter_roberta_base_jun2021_en.md | 94 ++++++++++++++ ...witter_roberta_base_jun2021_pipeline_en.md | 70 ++++++++++ ...witter_roberta_base_jun2022_15m_incr_en.md | 94 ++++++++++++++ ...berta_base_jun2022_15m_incr_pipeline_en.md | 70 ++++++++++ ...-09-05-ukraine_waray_philippines_pov_uk.md | 94 ++++++++++++++ ...-05-umberto_fine_tuned_irony_sarcasm_en.md | 94 ++++++++++++++ ...to_fine_tuned_irony_sarcasm_pipeline_en.md | 70 ++++++++++ .../2024-09-05-unibert_distilbert_2_en.md | 94 ++++++++++++++ ...-09-05-unibert_distilbert_2_pipeline_en.md | 70 ++++++++++ .../2024-09-05-unspsc_product_category_en.md | 94 ++++++++++++++ ...-05-unspsc_product_category_pipeline_en.md | 70 ++++++++++ .../2024-09-05-usclm_robrta_base_mk1_en.md | 94 ++++++++++++++ ...09-05-usclm_robrta_base_mk1_pipeline_en.md | 70 ++++++++++ ...-09-05-vidula_multilanguage_finetune_en.md | 94 ++++++++++++++ ...dula_multilanguage_finetune_pipeline_en.md | 70 ++++++++++ ...09-05-vietnews_roberta_base_pipeline_en.md | 70 ++++++++++ .../2024-09-05-vit_l_14_336_en.md | 120 ++++++++++++++++++ .../2024-09-05-vit_l_14_336_pipeline_en.md | 69 ++++++++++ ...nhala_audio_tonga_tonga_islands_text_en.md | 84 ++++++++++++ ...io_tonga_tonga_islands_text_pipeline_en.md | 69 ++++++++++ ...024-09-05-whisper_small_arabic_arbml_en.md | 84 ++++++++++++ ...-whisper_small_arabic_arbml_pipeline_en.md | 69 ++++++++++ ...-05-whisper_small_bengali_anuragshas_bn.md | 84 ++++++++++++ ...er_small_bengali_anuragshas_pipeline_bn.md | 69 ++++++++++ ...isper_small_egyptian_asr_v2_pipeline_en.md | 69 ++++++++++ ...isper_small_hindi_abhiramk6_pipeline_hi.md | 69 ++++++++++ .../2024-09-05-whisper_small_hre4_2_en.md | 84 ++++++++++++ ...-09-05-whisper_small_hre4_2_pipeline_en.md | 69 ++++++++++ ...2024-09-05-whisper_test_quant_smokxy_en.md | 84 ++++++++++++ ...5-whisper_test_quant_smokxy_pipeline_en.md | 69 ++++++++++ .../2024-09-05-wmdp_classifier_en.md | 94 ++++++++++++++ .../2024-09-05-wordwizard_masked_lm_en.md | 94 ++++++++++++++ ...-09-05-wordwizard_masked_lm_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlm_emo_t_maryamfp_en.md | 94 ++++++++++++++ ...24-09-05-xlm_emo_t_maryamfp_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlm_nli_m_korsemeval_en.md | 94 ++++++++++++++ ...-09-05-xlm_nli_m_korsemeval_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlm_pretrain_en.md | 94 ++++++++++++++ ...2024-09-05-xlm_roberta_base_1024_256_en.md | 94 ++++++++++++++ ..._roberta_base_balance_mixed_aug_swap_en.md | 94 ++++++++++++++ ...base_balance_mixed_aug_swap_pipeline_en.md | 70 ++++++++++ ...4-09-05-xlm_roberta_base_csfd_slovak_en.md | 94 ++++++++++++++ ...lm_roberta_base_csfd_slovak_pipeline_en.md | 70 ++++++++++ ...base_final_mixed_aug_replace_tfidf_1_en.md | 94 ++++++++++++++ ...l_mixed_aug_replace_tfidf_1_pipeline_en.md | 70 ++++++++++ ...m_roberta_base_final_vietnam_train_1_en.md | 94 ++++++++++++++ ..._base_final_vietnam_train_1_pipeline_en.md | 70 ++++++++++ ...etuned_augument_visquad2_15_3_2023_3_en.md | 86 +++++++++++++ ...5-xlm_roberta_base_finetuned_covidqa_en.md | 86 +++++++++++++ ...erta_base_finetuned_covidqa_pipeline_en.md | 69 ++++++++++ ...5-xlm_roberta_base_finetuned_english_en.md | 94 ++++++++++++++ ...erta_base_finetuned_english_pipeline_en.md | 70 ++++++++++ ...berautextification2024_9010_task2_v1_en.md | 94 ++++++++++++++ ...ification2024_9010_task2_v1_pipeline_en.md | 70 ++++++++++ ...lm_roberta_base_finetuned_kintweetse_en.md | 94 ++++++++++++++ ...a_base_finetuned_kintweetse_pipeline_en.md | 70 ++++++++++ ...yarwanda_finetuned_kinyarwanda_sent3_en.md | 94 ++++++++++++++ ...finetuned_kinyarwanda_sent3_pipeline_en.md | 70 ++++++++++ ...netuned_kinyarwanda_tweets_finetuned_en.md | 94 ++++++++++++++ ...inyarwanda_tweets_finetuned_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_marc_english_anditya_en.md | 94 ++++++++++++++ ...etuned_marc_english_anditya_pipeline_en.md | 70 ++++++++++ ...se_finetuned_marc_english_danwilbury_en.md | 94 ++++++++++++++ ...ta_base_finetuned_marc_english_dummy_en.md | 94 ++++++++++++++ ...inetuned_marc_english_dummy_pipeline_en.md | 70 ++++++++++ ...roberta_base_finetuned_nace_pipeline_en.md | 70 ++++++++++ ...-05-xlm_roberta_base_finetuned_shona_en.md | 94 ++++++++++++++ ...oberta_base_finetuned_shona_pipeline_en.md | 70 ++++++++++ ...lm_roberta_base_finetuned_sinquad_v1_en.md | 86 +++++++++++++ ...05-xlm_roberta_base_finetuned_somali_en.md | 94 ++++++++++++++ ...berta_base_finetuned_somali_pipeline_en.md | 70 ++++++++++ ...5-xlm_roberta_base_finetuned_squad_1_en.md | 86 +++++++++++++ ...l_then_wrime_all_first_epoch3_test01_en.md | 94 ++++++++++++++ ...ime_all_first_epoch3_test01_pipeline_en.md | 70 ++++++++++ ...24-09-05-xlm_roberta_base_indonesian_en.md | 94 ++++++++++++++ ...xlm_roberta_base_indonesian_pipeline_en.md | 70 ++++++++++ ...ase_lr0_001_seed42_amh_hau_eng_train_en.md | 94 ++++++++++++++ ...01_seed42_amh_hau_eng_train_pipeline_en.md | 70 ++++++++++ ...c_original_kinyarwanda_amh_eng_train_en.md | 94 ++++++++++++++ ...l_kinyarwanda_amh_eng_train_pipeline_en.md | 70 ++++++++++ ...c_original_kinyarwanda_hau_eng_train_en.md | 94 ++++++++++++++ ...l_kinyarwanda_hau_eng_train_pipeline_en.md | 70 ++++++++++ ...oberta_base_mixed_replace_vietnamese_en.md | 94 ++++++++++++++ ...se_mixed_replace_vietnamese_pipeline_en.md | 70 ++++++++++ ...finetuned_squad2_covidqa_v2_all_data_en.md | 86 +++++++++++++ ..._squad2_covidqa_v2_all_data_pipeline_en.md | 69 ++++++++++ ...base_squad2_finetuned_squad_vnktrmnb_en.md | 86 +++++++++++++ ...4-09-05-xlm_roberta_base_squad_dutch_en.md | 86 +++++++++++++ ...lm_roberta_base_squad_dutch_pipeline_en.md | 69 ++++++++++ ...se_squad_finetuned_on_runaways_dutch_en.md | 86 +++++++++++++ ..._squad_finetuned_on_runaways_english_en.md | 86 +++++++++++++ ...netuned_on_runaways_english_pipeline_en.md | 69 ++++++++++ ...e_squad_finetuned_on_runaways_french_en.md | 86 +++++++++++++ ...inetuned_on_runaways_french_pipeline_en.md | 69 ++++++++++ ..._french_60000_tweet_sentiment_french_en.md | 94 ++++++++++++++ ...entiment_arabic_trimmed_arabic_15000_en.md | 94 ++++++++++++++ ...arabic_trimmed_arabic_15000_pipeline_en.md | 70 ++++++++++ ...timent_english_trimmed_english_30000_en.md | 94 ++++++++++++++ ...glish_trimmed_english_30000_pipeline_en.md | 70 ++++++++++ ..._roberta_base_tweet_sentiment_french_en.md | 94 ++++++++++++++ ...timent_italian_trimmed_italian_10000_en.md | 94 ++++++++++++++ ...alian_trimmed_italian_10000_pipeline_en.md | 70 ++++++++++ ...timent_spanish_trimmed_spanish_30000_en.md | 94 ++++++++++++++ ...anish_trimmed_spanish_30000_pipeline_en.md | 70 ++++++++++ ..._roberta_base_verdict_classification_en.md | 94 ++++++++++++++ ...base_verdict_classification_pipeline_en.md | 70 ++++++++++ ...24-09-05-xlm_roberta_base_vietnamese_en.md | 86 +++++++++++++ ...xlm_roberta_base_vietnamese_pipeline_en.md | 69 ++++++++++ ...5-xlm_roberta_base_yelp_mlm_pipeline_en.md | 70 ++++++++++ ...clickbait_detection_nepal_bhasa_data_en.md | 94 ++++++++++++++ ..._detection_nepal_bhasa_data_pipeline_en.md | 70 ++++++++++ ...oberta_clickbait_spoiling_2_pipeline_en.md | 69 ++++++++++ ...024-09-05-xlm_roberta_emotion_unmolb_en.md | 94 ++++++++++++++ ...-xlm_roberta_emotion_unmolb_pipeline_en.md | 70 ++++++++++ ...europarl_language_detection_pipeline_xx.md | 70 ++++++++++ ..._roberta_europarl_language_detection_xx.md | 94 ++++++++++++++ ...etuned_augument_visquad2_24_3_2023_1_en.md | 86 +++++++++++++ ...gument_visquad2_24_3_2023_1_pipeline_en.md | 69 ++++++++++ ...ndosquadv2_1693993829_8_2e_05_0_01_5_en.md | 86 +++++++++++++ ...2_1693993829_8_2e_05_0_01_5_pipeline_en.md | 69 ++++++++++ ...rta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en.md | 69 ++++++++++ ...fine_tune_24465520_26265902_pipeline_en.md | 69 ++++++++++ ...nlp_more_fine_tune_24465520_26265908_en.md | 106 ++++++++++++++++ ...fine_tune_24465520_26265909_pipeline_en.md | 69 ++++++++++ ...nlp_more_fine_tune_24465520_26265911_en.md | 106 ++++++++++++++++ ...fine_tune_24465520_26265911_pipeline_en.md | 69 ++++++++++ ...oberta_base_squad2_24465519_pipeline_en.md | 69 ++++++++++ ...autonlp_roberta_base_squad2_24465521_en.md | 106 ++++++++++++++++ ...oberta_base_squad2_24465521_pipeline_en.md | 69 ++++++++++ ...hai_xlm_roberta_base_squad2_pipeline_th.md | 69 ++++++++++ ...erta_qa_thai_xlm_roberta_base_squad2_th.md | 107 ++++++++++++++++ .../2024-09-05-xlm_roberta_qa_xlm_all_en.md | 106 ++++++++++++++++ ...9-05-xlm_roberta_qa_xlm_all_pipeline_en.md | 69 ++++++++++ ...lm_roberta_qa_xlm_roberta_base_chaii_en.md | 106 ++++++++++++++++ ...xlm_roberta_base_vietnamese_pipeline_vn.md | 69 ++++++++++ ...berta_qa_xlm_roberta_base_vietnamese_vn.md | 106 ++++++++++++++++ ...lm_roberta_qa_xlm_roberta_squad_v1.1_en.md | 106 ++++++++++++++++ ...a_qa_xlm_roberta_squad_v1.1_pipeline_en.md | 69 ++++++++++ ...as_squad_spanish_spanish_saattrupdan_es.md | 86 +++++++++++++ ...spanish_spanish_saattrupdan_pipeline_es.md | 69 ++++++++++ ...lm_roberta_qa_xlmr_enis_qa_icelandic_is.md | 86 +++++++++++++ ...a_qa_xlmr_enis_qa_icelandic_pipeline_is.md | 69 ++++++++++ ..._v_base_trimmed_english_xnli_english_en.md | 94 ++++++++++++++ ...rimmed_english_xnli_english_pipeline_en.md | 70 ++++++++++ ...05-xlmr_chatgptdetect_noisy_pipeline_en.md | 70 ++++++++++ ...ish_german_all_shuffled_764_test1000_en.md | 94 ++++++++++++++ ..._german_train_shuffled_1986_test2000_en.md | 94 ++++++++++++++ ...rain_shuffled_1986_test2000_pipeline_en.md | 70 ++++++++++ ...english_train_shuffled_1986_test2000_en.md | 94 ++++++++++++++ ...rain_shuffled_1986_test2000_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlmr_finetuned_qamr_en.md | 86 +++++++++++++ ...4-09-05-xlmr_finetuned_qamr_pipeline_en.md | 69 ++++++++++ .../2024-09-05-xlmr_finetuned_squad1_en.md | 86 +++++++++++++ ...09-05-xlmr_finetuned_squad1_pipeline_en.md | 69 ++++++++++ .../2024-09-05-xlmr_qa_register_en.md | 94 ++++++++++++++ ...2024-09-05-xlmr_qa_register_pipeline_en.md | 70 ++++++++++ ...ese_english_all_shuffled_42_test1000_en.md | 94 ++++++++++++++ ...sh_all_shuffled_42_test1000_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlmr_tatoeba_en.md | 94 ++++++++++++++ .../2024-09-05-xlmr_tatoeba_pipeline_en.md | 70 ++++++++++ .../2024-09-05-xlmrbase_finetuned_squad_en.md | 86 +++++++++++++ ...05-xlmrbase_finetuned_squad_pipeline_en.md | 69 ++++++++++ ...lmroberta_finetuned_squadv2_pipeline_en.md | 69 ++++++++++ ...oberta_finetuned_tydiqa_tel_pipeline_en.md | 69 ++++++++++ ..._areaspopscience_mxtweets_multilabel_en.md | 94 ++++++++++++++ ...science_mxtweets_multilabel_pipeline_en.md | 70 ++++++++++ ..._science_subject_text_classification_en.md | 94 ++++++++++++++ .../2024-09-05-xnli_xlm_r_only_thai_en.md | 94 ++++++++++++++ ...-09-05-xnli_xlm_r_only_thai_pipeline_en.md | 70 ++++++++++ ...ity_microsoft_deberta_v3_base_seed_3_en.md | 94 ++++++++++++++ ...soft_deberta_v3_base_seed_3_pipeline_en.md | 70 ++++++++++ .../2024-09-05-yiddish_roberta_oscar_en.md | 94 ++++++++++++++ ...09-05-yiddish_roberta_oscar_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-05-zh2en40_en.md | 94 ++++++++++++++ .../2024-09-05-zh2en40_pipeline_en.md | 70 ++++++++++ ...ase_finetuned_yahoo_800_200_pipeline_en.md | 70 ++++++++++ ...2024-09-06-100_sdb_taxxl_average_768_en.md | 94 ++++++++++++++ ...6-100_sdb_taxxl_average_768_pipeline_en.md | 70 ++++++++++ ...002_baseline_xlmr_clickbait_spoiling_en.md | 86 +++++++++++++ ...ine_xlmr_clickbait_spoiling_pipeline_en.md | 69 ++++++++++ .../2024-09-06-4_shot_twitter_en.md | 86 +++++++++++++ .../2024-09-06-4_shot_twitter_pipeline_en.md | 69 ++++++++++ ...024-09-06-500_sdb_taxxl_truncate_768_en.md | 94 ++++++++++++++ .../2024-09-06-5w1h_recognizer_pipeline_es.md | 70 ++++++++++ ...6-7_shot_sta_slovak_batch10_pipeline_en.md | 69 ++++++++++ ...09-06-acarisbert_distilbert_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-accu_2_en.md | 94 ++++++++++++++ ...acrossapps_ndd_mantisbt_test_content_en.md | 94 ++++++++++++++ ...s_ndd_mantisbt_test_content_pipeline_en.md | 70 ++++++++++ .../2024-09-06-address_emnet_pipeline_en.md | 69 ++++++++++ ...r_mini_finetuned_kintweetsd_pipeline_en.md | 70 ++++++++++ ...ews_microsoft_deberta_v3_base_seed_3_en.md | 94 ++++++++++++++ ...soft_deberta_v3_base_seed_3_pipeline_en.md | 70 ++++++++++ ...ws_microsoft_deberta_v3_large_seed_3_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_3_pipeline_en.md | 70 ++++++++++ .../2024-09-06-ai_human_detai_kk.md | 94 ++++++++++++++ ...-albert_base_chinese_sayula_popoluca_zh.md | 94 ++++++++++++++ .../2024-09-06-albert_base_v2_luciayn_en.md | 94 ++++++++++++++ .../2024-09-06-albert_bbc_news_en.md | 94 ++++++++++++++ .../2024-09-06-albert_large_v2_fold_3_en.md | 94 ++++++++++++++ ...9-06-albert_large_v2_fold_3_pipeline_en.md | 70 ++++++++++ ...iny_chinese_sayula_popoluca_pipeline_zh.md | 70 ++++++++++ ...-albert_tiny_chinese_sayula_popoluca_zh.md | 94 ++++++++++++++ ...all_mpnet_all_permutations_64_05_mnr_en.md | 86 +++++++++++++ ..._all_permutations_64_05_mnr_pipeline_en.md | 69 ++++++++++ ...base_v2_2epoch_30000_mar2_closs_prsn_en.md | 86 +++++++++++++ ...epoch_30000_mar2_closs_prsn_pipeline_en.md | 69 ++++++++++ ...et_eval_emotion__classifier_pipeline_en.md | 70 ++++++++++ ...-all_mpnet_base_v2_bioasq_matryoshka_en.md | 86 +++++++++++++ ...t_base_v2_bioasq_matryoshka_pipeline_en.md | 69 ++++++++++ ...se_v2_fine_tuned_epochs_8_binhcode25_en.md | 86 +++++++++++++ ...024-09-06-all_mpnet_base_v2_ict_ep30_en.md | 86 +++++++++++++ ...-all_mpnet_base_v2_ict_ep30_pipeline_en.md | 69 ++++++++++ ...09-06-ancient_greek_to_1453_ner_bert_en.md | 94 ++++++++++++++ ...ient_greek_to_1453_ner_bert_pipeline_en.md | 70 ++++++++++ ...-assistantapp_whisper_quran_pipeline_ar.md | 69 ++++++++++ .../2024-09-06-atte_0_pipeline_en.md | 70 ++++++++++ ...graha_audio_tonga_tonga_islands_text_en.md | 84 ++++++++++++ ...io_tonga_tonga_islands_text_pipeline_en.md | 69 ++++++++++ ...6-autotrain_okr_iptal_v5_48523117787_en.md | 94 ++++++++++++++ ...in_okr_iptal_v5_48523117787_pipeline_en.md | 70 ++++++++++ .../2024-09-06-banglapunctuationmodel_en.md | 94 ++++++++++++++ ...9-06-banglapunctuationmodel_pipeline_en.md | 70 ++++++++++ .../2024-09-06-bat32_lr10_5_epo10_en.md | 84 ++++++++++++ ...24-09-06-bat32_lr10_5_epo10_pipeline_en.md | 69 ++++++++++ ...ased_finetuned_conll2003_english_ner_en.md | 94 ++++++++++++++ ...tuned_conll2003_english_ner_pipeline_en.md | 70 ++++++++++ ...09-06-bert_base_cased_finetuned_mrpc_en.md | 92 ++++++++++++++ ...t_base_cased_finetuned_mrpc_pipeline_en.md | 70 ++++++++++ ...ert_base_german_dbmdz_cased_pipeline_de.md | 70 ++++++++++ ...09-06-bert_base_german_dbmdz_uncased_de.md | 92 ++++++++++++++ .../2024-09-06-bert_base_magicslabnu_en.md | 94 ++++++++++++++ ...ual_cased_finetuned_amharic_pipeline_xx.md | 70 ++++++++++ ...multilingual_cased_finetuned_amharic_xx.md | 94 ++++++++++++++ ...tilingual_cased_google_bert_pipeline_xx.md | 70 ++++++++++ ..._base_multilingual_cased_google_bert_xx.md | 94 ++++++++++++++ .../2024-09-06-bert_base_ner_nc1_en.md | 94 ++++++++++++++ ...024-09-06-bert_base_ner_nc1_pipeline_en.md | 70 ++++++++++ ...base_turkish_cased_ner_finetuned_ner_en.md | 94 ++++++++++++++ ...ish_cased_ner_finetuned_ner_pipeline_en.md | 70 ++++++++++ ...bert_base_turkish_ner_cased_pipeline_tr.md | 70 ++++++++++ ...bert_base_uncased_contracts_pipeline_en.md | 70 ++++++++++ ...9-06-bert_checkpoint_980000_pipeline_en.md | 70 ++++++++++ .../2024-09-06-bert_fda_nutrition_ner_en.md | 94 ++++++++++++++ ...024-09-06-bert_fine_tuned_ner_resume_en.md | 94 ++++++++++++++ ...-bert_fine_tuned_ner_resume_pipeline_en.md | 70 ++++++++++ ...09-06-bert_finetuned1_squad_pipeline_en.md | 69 ++++++++++ ...9-06-bert_finetuned_ner_ashaduzzaman_en.md | 94 ++++++++++++++ ..._finetuned_ner_ashaduzzaman_pipeline_en.md | 70 ++++++++++ ...-bert_finetuned_ner_bangla_arbitropy_en.md | 94 ++++++++++++++ ...etuned_ner_bangla_arbitropy_pipeline_en.md | 70 ++++++++++ .../2024-09-06-bert_gps_research_001_en.md | 86 +++++++++++++ .../2024-09-06-bert_large_cased_en.md | 93 ++++++++++++++ ...-bert_large_cased_whole_word_masking_en.md | 92 ++++++++++++++ ...24-09-06-bert_large_uncased_squad_v2_en.md | 86 +++++++++++++ ...bert_large_uncased_squad_v2_pipeline_en.md | 69 ++++++++++ ...2024-09-06-bert_ner_kriyans_pipeline_en.md | 70 ++++++++++ ...phishing_classifier_student_jeahyung_en.md | 94 ++++++++++++++ ...sifier_base_vietnamese_upos_pipeline_vi.md | 70 ++++++++++ ...oken_classifier_base_vietnamese_upos_vi.md | 100 +++++++++++++++ ...r_berturk_keyword_extractor_pipeline_tr.md | 70 ++++++++++ ...classifier_berturk_keyword_extractor_tr.md | 98 ++++++++++++++ ...-06-bert_token_classifier_ner_german_de.md | 99 +++++++++++++++ ...rtimbau_large_ner_selective_pipeline_pt.md | 70 ++++++++++ ...-09-06-bertimbau_large_ner_selective_pt.md | 94 ++++++++++++++ ...4-09-06-bertin_roberta_large_spanish_es.md | 94 ++++++++++++++ ...ertin_roberta_large_spanish_pipeline_es.md | 70 ++++++++++ ...09-06-best_model_yelp_polarity_64_13_en.md | 94 ++++++++++++++ ...t_model_yelp_polarity_64_13_pipeline_en.md | 70 ++++++++++ ...bge_reranker_base_course_skill_tuned_en.md | 94 ++++++++++++++ ...ker_base_course_skill_tuned_pipeline_en.md | 70 ++++++++++ .../2024-09-06-bias_classifier_roberta_en.md | 94 ++++++++++++++ ...linicalbert_full_finetuned_ner_pablo_en.md | 94 ++++++++++++++ ...rt_full_finetuned_ner_pablo_pipeline_en.md | 70 ++++++++++ ...olq_microsoft_deberta_v3_base_seed_1_en.md | 94 ++++++++++++++ .../2024-09-06-bowdpr_marco_en.md | 94 ++++++++++++++ .../2024-09-06-bowdpr_marco_pipeline_en.md | 70 ++++++++++ ...09-06-bsc_bio_ehr_spanish_ehealth_kd_es.md | 94 ++++++++++++++ ..._bio_ehr_spanish_ehealth_kd_pipeline_es.md | 70 ++++++++++ ...sc_bio_ehr_spanish_symptemist_75_ner_en.md | 94 ++++++++++++++ ...r_spanish_symptemist_75_ner_pipeline_en.md | 70 ++++++++++ ...sc_bio_ehr_spanish_symptemist_85_ner_en.md | 94 ++++++++++++++ .../2024-09-06-burmese_anon_model_en.md | 94 ++++++++++++++ ...24-09-06-burmese_anon_model_pipeline_en.md | 70 ++++++++++ ...-burmese_awesome_model_ahmedssoliman_en.md | 94 ++++++++++++++ ...awesome_model_ahmedssoliman_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_model_gamino_pipeline_en.md | 70 ++++++++++ ...-09-06-burmese_awesome_punccap_model_en.md | 94 ++++++++++++++ ...some_qa_model_ahmad01010101_pipeline_en.md | 69 ++++++++++ ...urmese_awesome_qa_model_balalalalala_en.md | 86 +++++++++++++ ...esome_qa_model_balalalalala_pipeline_en.md | 69 ++++++++++ ...esome_qa_model_chernoffface_pipeline_en.md | 69 ++++++++++ ...-burmese_awesome_qa_model_diodiodada_en.md | 86 +++++++++++++ ...awesome_qa_model_diodiodada_pipeline_en.md | 69 ++++++++++ ..._awesome_qa_model_fede_ezeq_pipeline_en.md | 69 ++++++++++ ...6-burmese_awesome_qa_model_funailife_en.md | 86 +++++++++++++ ...6-burmese_awesome_qa_model_jennydqmm_en.md | 86 +++++++++++++ ...06-burmese_awesome_qa_model_kasmirak_en.md | 86 +++++++++++++ ...e_awesome_qa_model_kasmirak_pipeline_en.md | 69 ++++++++++ ...6-burmese_awesome_qa_model_lizhealey_en.md | 86 +++++++++++++ ...9-06-burmese_awesome_qa_model_madsci_en.md | 86 +++++++++++++ ...ese_awesome_qa_model_madsci_pipeline_en.md | 69 ++++++++++ ...wesome_qa_model_munnafaisal_pipeline_en.md | 69 ++++++++++ ...-burmese_awesome_qa_model_nandyala12_en.md | 86 +++++++++++++ ...awesome_qa_model_nandyala12_pipeline_en.md | 69 ++++++++++ ...6-burmese_awesome_qa_model_navanjana_en.md | 86 +++++++++++++ ..._awesome_qa_model_navanjana_pipeline_en.md | 69 ++++++++++ ...9-06-burmese_awesome_qa_model_pechka_en.md | 86 +++++++++++++ ...9-06-burmese_awesome_qa_model_rentao_en.md | 86 +++++++++++++ ...wesome_qa_model_robinsh2023_pipeline_en.md | 69 ++++++++++ ...urmese_awesome_qa_model_simranjeet97_en.md | 86 +++++++++++++ ...esome_qa_model_simranjeet97_pipeline_en.md | 69 ++++++++++ ...06-burmese_awesome_qa_model_speng123_en.md | 86 +++++++++++++ ...06-burmese_awesome_qa_model_stevhliu_en.md | 86 +++++++++++++ ...e_awesome_qa_model_stevhliu_pipeline_en.md | 69 ++++++++++ ..._awesome_qa_model_walter133_pipeline_en.md | 69 ++++++++++ ...awesome_qa_model_wwhheerree_pipeline_en.md | 69 ++++++++++ ...urmese_awesome_qa_model_yangyangsong_en.md | 86 +++++++++++++ ...esome_qa_model_yangyangsong_pipeline_en.md | 69 ++++++++++ ...-09-06-burmese_awesome_setfit_model1_en.md | 86 +++++++++++++ ...rmese_awesome_setfit_model1_pipeline_en.md | 69 ++++++++++ ..._awesome_token_classification_v2_1_2_en.md | 94 ++++++++++++++ ...token_classification_v2_1_2_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_all_saprotection_en.md | 94 ++++++++++++++ ...esome_wnut_all_saprotection_pipeline_en.md | 70 ++++++++++ ...ese_awesome_wnut_jpr_gonzalezrostani_en.md | 94 ++++++++++++++ ...me_wnut_jpr_gonzalezrostani_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_jquali_pipeline_en.md | 70 ++++++++++ ...mese_awesome_wnut_model_2_majyoarisu_en.md | 94 ++++++++++++++ ...ome_wnut_model_2_majyoarisu_pipeline_en.md | 70 ++++++++++ ...esome_wnut_model_adalee1001_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_model_casestudent_en.md | 94 ++++++++++++++ ...some_wnut_model_casestudent_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_dlim12_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_model_hoaan2003_en.md | 94 ++++++++++++++ ...wesome_wnut_model_hoaan2003_pipeline_en.md | 70 ++++++++++ ...burmese_awesome_wnut_model_jarvisx17_en.md | 94 ++++++++++++++ ...wesome_wnut_model_jarvisx17_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_model_jsalafranca_en.md | 94 ++++++++++++++ ...some_wnut_model_jsalafranca_pipeline_en.md | 70 ++++++++++ ...-06-burmese_awesome_wnut_model_mhdkj_en.md | 94 ++++++++++++++ ...06-burmese_awesome_wnut_model_povhal_en.md | 94 ++++++++++++++ ...e_awesome_wnut_model_povhal_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_roeizucker_en.md | 94 ++++++++++++++ ...esome_wnut_model_roeizucker_pipeline_en.md | 70 ++++++++++ ...e_awesome_wnut_model_ttnksm_pipeline_en.md | 70 ++++++++++ ...esome_wnut_model_yannik_646_pipeline_en.md | 70 ++++++++++ ...-06-burmese_awesome_wnut_model_ydc13_en.md | 94 ++++++++++++++ ...se_awesome_wnut_model_ydc13_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_model_yunjinchoi_en.md | 94 ++++++++++++++ ...esome_wnut_model_yunjinchoi_pipeline_en.md | 70 ++++++++++ ...06-burmese_awesome_wnut_model_zanche_en.md | 94 ++++++++++++++ ...e_awesome_wnut_model_zanche_pipeline_en.md | 70 ++++++++++ ...09-06-burmese_dist_nepal_bhasa_model_en.md | 94 ++++++++++++++ ...mese_dist_nepal_bhasa_model_pipeline_en.md | 70 ++++++++++ ...pus_maltese_english_vietnamese_model_en.md | 94 ++++++++++++++ .../2024-09-06-burmese_model_onsba_en.md | 86 +++++++++++++ ...4-09-06-burmese_model_onsba_pipeline_en.md | 69 ++++++++++ ...6-burmese_ner_model_mido545_pipeline_en.md | 70 ++++++++++ ...-burmese_nmt_model_ad_iiitd_pipeline_en.md | 70 ++++++++++ ...burmese_ws_extraction_model_26th_mar_en.md | 94 ++++++++++++++ ...06-camembert_base_test_model_sophios_en.md | 94 ++++++++++++++ ...ert_base_test_model_sophios_pipeline_en.md | 70 ++++++++++ .../2024-09-06-candle_cvss_complexity_en.md | 94 ++++++++++++++ ...9-06-candle_cvss_complexity_pipeline_en.md | 70 ++++++++++ ...24-09-06-candle_cvss_confidentiality_en.md | 94 ++++++++++++++ ...candle_cvss_confidentiality_pipeline_en.md | 70 ++++++++++ ...-06-candle_cvss_interaction_pipeline_en.md | 70 ++++++++++ .../2024-09-06-canvers_audio_caption_v1_en.md | 84 ++++++++++++ ...06-canvers_audio_caption_v1_pipeline_en.md | 69 ++++++++++ ...09-06-chatutterance_english_pipeline_en.md | 70 ++++++++++ .../2024-09-06-chuvash_validator_en.md | 86 +++++++++++++ ...claim_extraction_classifier_pipeline_en.md | 70 ++++++++++ ...6-clinicalbert_craft_ner_nepal_bhasa_en.md | 94 ++++++++++++++ ...lbert_craft_ner_nepal_bhasa_pipeline_en.md | 70 ++++++++++ ...-06-clinicalnerpt_procedure_pipeline_pt.md | 70 ++++++++++ .../2024-09-06-clinicalnerpt_procedure_pt.md | 94 ++++++++++++++ ...shion_attribute_model_try_1_pipeline_en.md | 69 ++++++++++ ...m_trimmed_with_g_and_spaces_pipeline_en.md | 70 ++++++++++ .../2024-09-06-codice_fiscale_en.md | 94 ++++++++++++++ ...6-complaints_classifier_jpsteinhafel_en.md | 94 ++++++++++++++ .../2024-09-06-congretimbau_en.md | 94 ++++++++++++++ .../2024-09-06-congretimbau_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-content_en.md | 86 +++++++++++++ ...nvberturk_keyword_extractor_pipeline_tr.md | 70 ++++++++++ ...-09-06-convberturk_keyword_extractor_tr.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-06-cpegen_vpv_en.md | 94 ++++++++++++++ ...024-09-06-cpu_conditional_classifier_en.md | 92 ++++++++++++++ ...-cpu_conditional_classifier_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-cros_1_en.md | 94 ++++++++++++++ .../2024-09-06-cros_1_pipeline_en.md | 70 ++++++++++ .../2024-09-06-cryptobertrefined_en.md | 94 ++++++++++++++ ...4-09-06-cs431_vietnamese_coqe_csi_v2_en.md | 94 ++++++++++++++ ...s431_vietnamese_coqe_csi_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-06-ct_cos_xlmr_20230920_2_en.md | 86 +++++++++++++ ...9-06-ct_cos_xlmr_20230920_2_pipeline_en.md | 69 ++++++++++ .../2024-09-06-ct_kld_xlmr_20230814_en.md | 86 +++++++++++++ ...024-09-06-danish_distilbert_pipeline_en.md | 70 ++++++++++ .../2024-09-06-darkbert_finetuned_ner_en.md | 94 ++++++++++++++ ...9-06-darkbert_finetuned_ner_pipeline_en.md | 70 ++++++++++ ...kde4_english_german_qlora_dataequity_en.md | 94 ++++++++++++++ ...ish_german_qlora_dataequity_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-dbbuc_30p_en.md | 94 ++++++++++++++ .../2024-09-06-dbbuc_30p_pipeline_en.md | 70 ++++++++++ ...9-06-deberta_amazon_reviews_v1_shuli_en.md | 94 ++++++++++++++ ...-deberta_large_hallucination_eval_v2_en.md | 94 ++++++++++++++ ...large_hallucination_eval_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-06-deberta_v3_base_base_en.md | 94 ++++++++++++++ ...-09-06-deberta_v3_base_base_pipeline_en.md | 70 ++++++++++ ...6-deberta_v3_base_glue_cola_pipeline_en.md | 70 ++++++++++ ...9-06-deberta_v3_base_qqp_garipovroma_en.md | 94 ++++++++++++++ ...rta_v3_base_qqp_garipovroma_pipeline_en.md | 70 ++++++++++ ...024-09-06-deberta_v3_base_rocstories_en.md | 94 ++++++++++++++ ...rta_v3_base_rocstories_test_pipeline_en.md | 70 ++++++++++ ...eberta_v3_base_tasksource_paraphrase_en.md | 94 ++++++++++++++ ..._base_tasksource_paraphrase_pipeline_en.md | 70 ++++++++++ ...v3_bass_complex_questions_classifier_en.md | 94 ++++++++++++++ ...6-deberta_v3_large__sst2__train_16_3_en.md | 94 ++++++++++++++ ..._v3_large__sst2__train_16_3_pipeline_en.md | 70 ++++++++++ ...6-deberta_v3_large__sst2__train_16_7_en.md | 94 ++++++++++++++ ..._v3_large__sst2__train_16_7_pipeline_en.md | 70 ++++++++++ ...06-deberta_v3_large__sst2__train_8_4_en.md | 94 ++++++++++++++ .../2024-09-06-deberta_v3_large_emotion_en.md | 94 ++++++++++++++ ...06-deberta_v3_large_emotion_pipeline_en.md | 70 ++++++++++ ...eberta_v3_large_mono_3_epoch_lr_5e_6_en.md | 94 ++++++++++++++ ..._large_mono_3_epoch_lr_5e_6_pipeline_en.md | 70 ++++++++++ ...024-09-06-deberta_v3_large_offensive_en.md | 94 ++++++++++++++ ...-deberta_v3_large_offensive_pipeline_en.md | 70 ++++++++++ ...in_passage_old_facts_rater_half_gpt4_en.md | 94 ++++++++++++++ ...e_old_facts_rater_half_gpt4_pipeline_en.md | 70 ++++++++++ ...hasa_fact_related_passage_rater_gpt4_en.md | 94 ++++++++++++++ ...hasa_fact_related_passage_rater_half_en.md | 94 ++++++++++++++ ..._related_passage_rater_half_pipeline_en.md | 70 ++++++++++ ...berta_v3_large_survey_rater_sample_1_en.md | 94 ++++++++++++++ ...large_survey_rater_sample_1_pipeline_en.md | 70 ++++++++++ ...lated_passage_consistency_rater_half_en.md | 94 ++++++++++++++ ...sage_consistency_rater_half_pipeline_en.md | 70 ++++++++++ ...eberta_v3_small_finetuned_mnli_rdp99_en.md | 94 ++++++++++++++ ...berta_xsmall_hatespeech_reward_model_en.md | 94 ++++++++++++++ .../2024-09-06-declutr_s10_arabic_en.md | 94 ++++++++++++++ ...24-09-06-declutr_s10_arabic_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-deep_4_en.md | 94 ++++++++++++++ ...-06-deeva_modcat_seqclass_deberta_v1_en.md | 94 ++++++++++++++ ..._modcat_seqclass_deberta_v1_pipeline_en.md | 70 ++++++++++ .../2024-09-06-detectors_legit_user_en.md | 94 ++++++++++++++ ...-09-06-detectors_legit_user_pipeline_en.md | 70 ++++++++++ .../2024-09-06-disease_diagnosis_en.md | 94 ++++++++++++++ ...024-09-06-disease_diagnosis_pipeline_en.md | 70 ++++++++++ ...tilbert_base_cased_finetuned_chunk_2_en.md | 94 ++++++++++++++ ...ase_cased_finetuned_chunk_2_pipeline_en.md | 70 ++++++++++ ...ive_extraction_retrained_fedora_copr_en.md | 86 +++++++++++++ ...ction_retrained_fedora_copr_pipeline_en.md | 69 ++++++++++ ...istilbert_base_german_cased_pipeline_de.md | 70 ++++++++++ ...ert_base_multilingual_cased_pipeline_xx.md | 70 ++++++++++ ...6-distilbert_base_multilingual_cased_xx.md | 94 ++++++++++++++ ...-06-distilbert_base_ner_058_pipeline_en.md | 70 ++++++++++ ...bert_base_uncased_ai4privacy_english_en.md | 94 ++++++++++++++ ..._uncased_ai4privacy_english_pipeline_en.md | 70 ++++++++++ ...ase_uncased_distilled_squad_qa_model_en.md | 86 +++++++++++++ ...ed_distilled_squad_qa_model_pipeline_en.md | 69 ++++++++++ ...lbert_base_uncased_emotion_xyang2023_en.md | 94 ++++++++++++++ ...e_uncased_emotion_xyang2023_pipeline_en.md | 70 ++++++++++ ...e_uncased_finetuned_clinc_buruzaemon_en.md | 94 ++++++++++++++ ...tilbert_base_uncased_finetuned_dourc_en.md | 86 +++++++++++++ ...ase_uncased_finetuned_dourc_pipeline_en.md | 69 ++++++++++ ...ase_uncased_finetuned_emotion_sjhong_en.md | 94 ++++++++++++++ ...ed_finetuned_emotion_sjhong_pipeline_en.md | 70 ++++++++++ ...ncased_finetuned_emotion_skillripper_en.md | 94 ++++++++++++++ ...ert_base_uncased_finetuned_goemotion_en.md | 94 ++++++++++++++ ...ilbert_base_uncased_finetuned_hotels_en.md | 94 ++++++++++++++ ...se_uncased_finetuned_hotels_pipeline_en.md | 70 ++++++++++ ...inetuned_imdb_accelerate_blitherboom_en.md | 94 ++++++++++++++ ...imdb_accelerate_blitherboom_pipeline_en.md | 70 ++++++++++ ...d_finetuned_imdb_accelerate_haotiany_en.md | 94 ++++++++++++++ ...ed_finetuned_imdb_accelerate_ipvikas_en.md | 94 ++++++++++++++ ...ned_imdb_accelerate_ipvikas_pipeline_en.md | 70 ++++++++++ ...d_finetuned_imdb_accelerate_muhbdeir_en.md | 94 ++++++++++++++ ...elerate_nlp_course_chapter7_section2_en.md | 94 ++++++++++++++ ...lp_course_chapter7_section2_pipeline_en.md | 70 ++++++++++ ...tuned_imdb_accelerate_valentinguigon_en.md | 94 ++++++++++++++ ...b_accelerate_valentinguigon_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_imdb_baobao88_en.md | 94 ++++++++++++++ ...se_uncased_finetuned_imdb_beccacohen_en.md | 94 ++++++++++++++ ...inetuned_imdb_hamzasidhu786_pipeline_en.md | 70 ++++++++++ ...ed_imdb_hf_tutorial_using_accelerate_en.md | 94 ++++++++++++++ ...rt_base_uncased_finetuned_imdb_lb100_en.md | 94 ++++++++++++++ ...sed_finetuned_imdb_mireya25_pipeline_en.md | 70 ++++++++++ ...cased_finetuned_imdb_rikrim_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_imdb_ronenh24_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_ner_arundhati87_en.md | 94 ++++++++++++++ ...d_finetuned_ner_arundhati87_pipeline_en.md | 70 ++++++++++ ..._base_uncased_finetuned_ner_cerastes_en.md | 94 ++++++++++++++ ...rt_base_uncased_finetuned_ner_iamdev_en.md | 94 ++++++++++++++ ...ncased_finetuned_ner_iamdev_pipeline_en.md | 70 ++++++++++ ...base_uncased_finetuned_ner_steven668_en.md | 94 ++++++++++++++ ...sed_finetuned_ner_steven668_pipeline_en.md | 70 ++++++++++ ..._sentiment_amazon_finetuned_emotions_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_squad_blaze07_en.md | 86 +++++++++++++ ...ased_finetuned_squad_d5716d28_dkimds_en.md | 92 ++++++++++++++ ...tuned_squad_d5716d28_dkimds_pipeline_en.md | 69 ++++++++++ ...sed_finetuned_squad_d5716d28_maseiya_en.md | 92 ++++++++++++++ ...inetuned_squad_d5716d28_miesnerjacob_en.md | 92 ++++++++++++++ ...ed_finetuned_squad_d5716d28_scmis011_en.md | 86 +++++++++++++ ...ned_squad_d5716d28_scmis011_pipeline_en.md | 69 ++++++++++ ..._finetuned_squad_d5716d28_sebastians_en.md | 92 ++++++++++++++ ...d_squad_d5716d28_sebastians_pipeline_en.md | 69 ++++++++++ ...squad_d5716d28_shadowtwin41_pipeline_en.md | 69 ++++++++++ ...ase_uncased_finetuned_squad_katxtong_en.md | 86 +++++++++++++ ...ed_finetuned_squad_katxtong_pipeline_en.md | 69 ++++++++++ ...se_uncased_finetuned_squad_shyamkant_en.md | 86 +++++++++++++ ...d_finetuned_squad_shyamkant_pipeline_en.md | 69 ++++++++++ ...sed_finetuned_squad_toorgil_pipeline_en.md | 69 ++++++++++ ...n_chamorro_cree_entry_classification_it.md | 94 ++++++++++++++ ...o_cree_entry_classification_pipeline_it.md | 70 ++++++++++ ...istilbert_base_uncased_on_mini_finer_en.md | 94 ++++++++++++++ ..._base_uncased_on_mini_finer_pipeline_en.md | 70 ++++++++++ ..._uncased_question_answering_pipeline_en.md | 69 ++++++++++ ...06-distilbert_base_uncased_squad2_p5_en.md | 86 +++++++++++++ ...bert_base_uncased_squad2_p5_pipeline_en.md | 69 ++++++++++ ...ed_imdb_nlp_course_chapter7_section2_en.md | 94 ++++++++++++++ .../2024-09-06-distilbert_conll2003_en.md | 94 ++++++++++++++ ...2024-09-06-distilbert_emotion_ahm123_en.md | 94 ++++++++++++++ ...6-distilbert_emotion_ahm123_pipeline_en.md | 70 ++++++++++ ...-06-distilbert_extractive_qa_project_en.md | 86 +++++++++++++ ...6-distilbert_finetune_own_data_model_en.md | 94 ++++++++++++++ ...ert_finetune_own_data_model_pipeline_en.md | 70 ++++++++++ ...ert_finetuned_ai4privacy_v2_pipeline_en.md | 70 ++++++++++ ...rt_finetuned_ner_amanpatkar_pipeline_en.md | 70 ++++++++++ ...-06-distilbert_finetuned_ner_mido545_en.md | 94 ++++++++++++++ ...lbert_finetuned_ner_mido545_pipeline_en.md | 70 ++++++++++ ...2024-09-06-distilbert_finetuned_oppo_en.md | 94 ++++++++++++++ ...6-distilbert_finetuned_oppo_pipeline_en.md | 70 ++++++++++ ...tilbert_finetuned_squad_ashaduzzaman_en.md | 86 +++++++++++++ ...inetuned_squad_ashaduzzaman_pipeline_en.md | 69 ++++++++++ ...ert_finetuned_squad_droo303_pipeline_en.md | 69 ++++++++++ ...lbert_finetuned_squadv2_nmc_29092004_en.md | 86 +++++++++++++ ...ert_finetuned_squadv2_vubacktracking_en.md | 86 +++++++++++++ ...uned_squadv2_vubacktracking_pipeline_en.md | 69 ++++++++++ .../2024-09-06-distilbert_maccrobat_en.md | 94 ++++++++++++++ ...-09-06-distilbert_maccrobat_pipeline_en.md | 70 ++++++++++ .../2024-09-06-distilbert_ner_initial_en.md | 94 ++++++++++++++ ...9-06-distilbert_ner_initial_pipeline_en.md | 70 ++++++++++ ...bert_qa_english_german_spanish_model_xx.md | 86 +++++++++++++ ...man_vietnamese_chinese_spanish_model_xx.md | 86 +++++++++++++ ...6-distilbert_qa_robustqa_baseline_01_en.md | 98 ++++++++++++++ ...6-distilbert_qa_robustqa_baseline_02_en.md | 98 ++++++++++++++ ...sh_vietnamese_chinese_model_pipeline_xx.md | 69 ++++++++++ ...lbert_sentiment_analysis_socialmedia_en.md | 94 ++++++++++++++ ...timent_analysis_socialmedia_pipeline_en.md | 70 ++++++++++ ...ert_tokenizer_256k_mlm_750k_pipeline_en.md | 70 ++++++++++ ...09-06-distilbertbaselinehsthreeepoch_en.md | 86 +++++++++++++ ...tilbertbaselinehsthreeepoch_pipeline_en.md | 69 ++++++++++ ...wscategoryclassification_fullmodel_3_en.md | 94 ++++++++++++++ ...-09-06-distilroberta_base_distilbert_en.md | 94 ++++++++++++++ ...stilroberta_base_distilbert_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_disregard_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_disregard_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_jdonnelly0804_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_2_ysharma_en.md | 94 ++++++++++++++ ...09-06-dummy_model_2_ysharma_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_alex_deng_en.md | 94 ++++++++++++++ ...09-06-dummy_model_alex_deng_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_alexey_wk_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_aokikenichi_en.md | 94 ++++++++++++++ ...-06-dummy_model_aokikenichi_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_bingwork_en.md | 94 ++++++++++++++ ...-09-06-dummy_model_bingwork_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_chuckym_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_ericchchiu_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_exilesaber_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_fab7_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_gautamguru_en.md | 94 ++++++++++++++ ...9-06-dummy_model_gautamguru_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_jaweed123_en.md | 94 ++++++++++++++ ...09-06-dummy_model_jaweed123_pipeline_en.md | 70 ++++++++++ ...09-06-dummy_model_jp1773hsu_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_kwasi_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_leisa_en.md | 94 ++++++++++++++ ...024-09-06-dummy_model_leisa_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_longbabin_en.md | 94 ++++++++++++++ ...09-06-dummy_model_longbabin_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_lourvalli_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_manil5267_en.md | 94 ++++++++++++++ ...09-06-dummy_model_manil5267_pipeline_en.md | 70 ++++++++++ ...4-09-06-dummy_model_muger01_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_nayoung1_en.md | 94 ++++++++++++++ ...-09-06-dummy_model_nayoung1_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_nugget00_en.md | 94 ++++++++++++++ .../2024-09-06-dummy_model_renly_en.md | 94 ++++++++++++++ ...024-09-06-dummy_model_renly_pipeline_en.md | 70 ++++++++++ ...09-06-dummy_model_rizwanakt_pipeline_en.md | 70 ++++++++++ ...06-dummy_model_sapphirejade_pipeline_en.md | 70 ++++++++++ ...09-06-dummy_model_srushnaik_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_suzuki0829_en.md | 94 ++++++++++++++ ...9-06-dummy_model_suzuki0829_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_tzoght_en.md | 94 ++++++++++++++ ...24-09-06-dummy_model_tzoght_pipeline_en.md | 70 ++++++++++ .../2024-09-06-dummy_model_wjh70301_en.md | 94 ++++++++++++++ ...-09-06-dummy_model_wjh70301_pipeline_en.md | 70 ++++++++++ ...4-09-06-dummy_model_youarebelongtome_en.md | 94 ++++++++++++++ ...ummy_model_youarebelongtome_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-eng_lug_en.md | 94 ++++++++++++++ .../2024-09-06-eng_lug_pipeline_en.md | 70 ++++++++++ ...lish_coptic_norm_group_greekified_bt_en.md | 94 ++++++++++++++ ...024-09-06-english_german_translation_en.md | 94 ++++++++++++++ .../2024-09-06-english_japanese_xlm_5_en.md | 94 ++++++++++++++ ...9-06-english_japanese_xlm_5_pipeline_en.md | 70 ++++++++++ ...-english_multinerd_ner_more_training_en.md | 94 ++++++++++++++ ...multinerd_ner_more_training_pipeline_en.md | 70 ++++++++++ ...i_nlp_tp4_virtual_assistant_pipeline_fr.md | 66 ++++++++++ ..._virtual_assistant_pipeline_pipeline_fr.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-expe_4_en.md | 94 ++++++++++++++ .../2024-09-06-expe_4_pipeline_en.md | 70 ++++++++++ ...024-09-06-extract_question_from_text_en.md | 86 +++++++++++++ .../2024-09-06-extractive_qa_model_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-06-fabert_fa.md | 94 ++++++++++++++ .../2024-09-06-fabert_pipeline_fa.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-facets_128b_en.md | 86 +++++++++++++ .../2024-09-06-facets_128b_pipeline_en.md | 69 ++++++++++ .../2024-09-06-facets_ep3_1122_en.md | 86 +++++++++++++ .../2024-09-06-facets_ep3_1122_pipeline_en.md | 69 ++++++++++ .../2024-09-06-fairlex_fscs_minilm_de.md | 94 ++++++++++++++ ...4-09-06-fairlex_fscs_minilm_pipeline_de.md | 70 ++++++++++ .../2024-09-06-faq_qa_model_manvitha_en.md | 86 +++++++++++++ ...09-06-faq_qa_model_manvitha_pipeline_en.md | 69 ++++++++++ ...2024-09-06-few_shot_learner_pipeline_en.md | 69 ++++++++++ ...9-06-fiction_gbert_large_droc_np_ner_de.md | 94 ++++++++++++++ ...ion_gbert_large_droc_np_ner_pipeline_de.md | 70 ++++++++++ .../_posts/ahmedlone127/2024-09-06-film_en.md | 94 ++++++++++++++ .../2024-09-06-film_pipeline_en.md | 70 ++++++++++ .../2024-09-06-finance_bearish_bullish_en.md | 94 ++++++++++++++ .../2024-09-06-fine_tune_bert_en.md | 86 +++++++++++++ ...6-fine_tuned_distilbert_base_uncased_en.md | 100 +++++++++++++++ ...ine_tuned_distilbert_medical_chatbot_en.md | 86 +++++++++++++ ..._distilbert_medical_chatbot_pipeline_en.md | 69 ++++++++++ ...ed_helsinki_nlp_english_marathi_marh_en.md | 94 ++++++++++++++ ...ki_nlp_marathi_marh_english_pipeline_en.md | 70 ++++++++++ ...ed_hindi_tonga_tonga_islands_english_en.md | 94 ++++++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ ...uguese_tonga_tonga_islands_catalan_2_en.md | 94 ++++++++++++++ ...nga_tonga_islands_catalan_2_pipeline_en.md | 70 ++++++++++ ...usmt_english_hindi_gujarati_pipeline_en.md | 70 ++++++++++ .../2024-09-06-finetuned_qa_model_en.md | 86 +++++++++++++ ...uned_whisper_small_tanglish_pipeline_en.md | 69 ++++++++++ ..._sentiment_model_3000_samples_albert_en.md | 94 ++++++++++++++ ...t_model_3000_samples_albert_pipeline_en.md | 70 ++++++++++ ...6-finetuning_sentiment_model_deberta_en.md | 94 ++++++++++++++ ...2024-09-06-finguard_distilbert_37500_en.md | 94 ++++++++++++++ ...6-finguard_distilbert_37500_pipeline_en.md | 70 ++++++++++ .../2024-09-06-finnish_rauma_fi.md | 94 ++++++++++++++ .../2024-09-06-finnish_rauma_pipeline_fi.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-first_model_en.md | 92 ++++++++++++++ .../2024-09-06-first_model_pipeline_en.md | 70 ++++++++++ .../2024-09-06-gal_ner_xlmr_5_en.md | 94 ++++++++++++++ .../2024-09-06-gal_ner_xlmr_5_pipeline_en.md | 70 ++++++++++ .../2024-09-06-gdpr_privacy_policy_ner_en.md | 94 ++++++++++++++ ...-06-gdpr_privacy_policy_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-06-hate_bert_hasoc_marathi_mr.md | 94 ++++++++++++++ ...-06-hate_bert_hasoc_marathi_pipeline_mr.md | 70 ++++++++++ ...9-06-hate_hate_random3_seed0_bernice_en.md | 94 ++++++++++++++ ..._hate_random3_seed0_bernice_pipeline_en.md | 70 ++++++++++ ...-09-06-hf_distilbert_imdb_mlm_cosine_en.md | 92 ++++++++++++++ ..._distilbert_imdb_mlm_cosine_pipeline_en.md | 70 ++++++++++ .../2024-09-06-huner_ncbi_disease_dslim_en.md | 94 ++++++++++++++ ...06-huner_ncbi_disease_dslim_pipeline_en.md | 70 ++++++++++ .../2024-09-06-hw10_qa_model_en.md | 86 +++++++++++++ .../2024-09-06-hw10_qa_model_pipeline_en.md | 69 ++++++++++ ...sp_roberta_train_conclusion_pipeline_en.md | 70 ++++++++++ .../2024-09-06-imdb_gpt2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-imdb_v1_en.md | 94 ++++++++++++++ .../2024-09-06-imdb_v1_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-inde_1_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-06-inde_3_en.md | 94 ++++++++++++++ .../2024-09-06-inde_3_pipeline_en.md | 70 ++++++++++ ...09-06-indo_aryan_xlm_r_base_pipeline_gu.md | 70 ++++++++++ ...6-intentclassification_v3_kssumanth6_en.md | 94 ++++++++++++++ .../2024-09-06-intropln_setfit_model_en.md | 86 +++++++++++++ ...09-06-intropln_setfit_model_pipeline_en.md | 69 ++++++++++ .../2024-09-06-investopedia_qna_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-06-kannada_ner_kn.md | 94 ++++++++++++++ .../2024-09-06-kannada_ner_pipeline_kn.md | 70 ++++++++++ .../2024-09-06-lab1_finetuning_bill1886_en.md | 94 ++++++++++++++ ...06-lab1_finetuning_bill1886_pipeline_en.md | 70 ++++++++++ ...24-09-06-lab1_finetuning_chenxin0903_en.md | 94 ++++++++++++++ ...lab1_finetuning_chenxin0903_pipeline_en.md | 70 ++++++++++ .../2024-09-06-lab1_finetuning_muyuanma_en.md | 94 ++++++++++++++ ...06-lab1_finetuning_muyuanma_pipeline_en.md | 70 ++++++++++ ...-latte_mc_bert_base_thai_ws_pipeline_th.md | 70 ++++++++++ ...024-09-06-latte_mc_bert_base_thai_ws_th.md | 94 ++++++++++++++ .../2024-09-06-lenate_model_8_en.md | 94 ++++++++++++++ .../2024-09-06-lenate_model_8_pipeline_en.md | 70 ++++++++++ ...llm_firewall_distilbert_base_uncased_en.md | 94 ++++++++++++++ ...all_distilbert_base_uncased_pipeline_en.md | 70 ++++++++++ ...cased_meetingbank_microsoft_pipeline_xx.md | 70 ++++++++++ ...ilingual_cased_meetingbank_microsoft_xx.md | 94 ++++++++++++++ ...09-06-malay_sentiment_deberta_xsmall_ms.md | 94 ++++++++++++++ ...ay_sentiment_deberta_xsmall_pipeline_ms.md | 70 ++++++++++ .../2024-09-06-malaysian_whisper_base_ms.md | 84 ++++++++++++ ...9-06-malaysian_whisper_base_pipeline_ms.md | 69 ++++++++++ .../2024-09-06-marathi_albert_v2_mr.md | 94 ++++++++++++++ ...024-09-06-marathi_albert_v2_pipeline_mr.md | 70 ++++++++++ .../2024-09-06-marathi_marh_val_g_mr.md | 84 ++++++++++++ ...tonga_islands_english_charliealex123_en.md | 94 ++++++++++++++ ..._french_accelerate_longma98_pipeline_en.md | 70 ++++++++++ ...tonga_islands_french_indah1_pipeline_en.md | 70 ++++++++++ ...ish_tonga_tonga_islands_french_jhhan_en.md | 94 ++++++++++++++ ..._tonga_islands_vietnamese_accelerate_en.md | 94 ++++++++++++++ ...lands_vietnamese_accelerate_pipeline_en.md | 70 ++++++++++ ...09-06-marian_finetuned_maltese_en2vi_en.md | 94 ++++++++++++++ ...ian_finetuned_maltese_en2vi_pipeline_en.md | 70 ++++++++++ ...y_fine_tuned_english_polish_pipeline_en.md | 70 ++++++++++ .../2024-09-06-marian_nmt_enid_en.md | 94 ++++++++++++++ .../2024-09-06-marian_nmt_enid_pipeline_en.md | 70 ++++++++++ ...english_chinese_10_deskdown_pipeline_en.md | 70 ++++++++++ .../2024-09-06-masked_lm_shakespeare_en.md | 94 ++++++++++++++ .../2024-09-06-mdeberta_base_v3_2_en.md | 94 ++++++++++++++ ...024-09-06-mdeberta_nli_bilingual_2_0_en.md | 94 ++++++++++++++ ...-mdeberta_nli_bilingual_2_0_pipeline_en.md | 70 ++++++++++ ...ta_v3_base_assin_similarity_pipeline_pt.md | 70 ++++++++++ ...06-mdeberta_v3_base_assin_similarity_pt.md | 94 ++++++++++++++ ...-06-mdeberta_v3_base_hatebr_pipeline_pt.md | 70 ++++++++++ ...2024-09-06-mdeberta_v3_base_qnli_100_en.md | 94 ++++++++++++++ ...6-mdeberta_v3_base_qnli_100_pipeline_en.md | 70 ++++++++++ .../2024-09-06-mdeberta_v3_base_qnli_1_en.md | 94 ++++++++++++++ ...-06-mdeberta_v3_base_qnli_1_pipeline_en.md | 70 ++++++++++ .../2024-09-06-mdeberta_v3_base_qqp_100_en.md | 94 ++++++++++++++ ...06-mdeberta_v3_base_qqp_100_pipeline_en.md | 70 ++++++++++ .../2024-09-06-med_bert_pipeline_en.md | 70 ++++++++++ ...4-09-06-medroberta_dutch_experiencer_nl.md | 94 ++++++++++++++ ...edroberta_dutch_experiencer_pipeline_nl.md | 70 ++++++++++ .../2024-09-06-megabatchmarginloss_10_en.md | 86 +++++++++++++ ...9-06-megabatchmarginloss_10_pipeline_en.md | 69 ++++++++++ ...24-09-06-memo_bert_sanskrit_saskta_3_en.md | 94 ++++++++++++++ ...memo_bert_sanskrit_saskta_3_pipeline_en.md | 70 ++++++++++ .../2024-09-06-memo_bert_wsd_en.md | 94 ++++++++++++++ .../2024-09-06-memo_bert_wsd_pipeline_en.md | 70 ++++++++++ ...-microsoft_deberta_v3_large_cls_cree_en.md | 94 ++++++++++++++ ...t_deberta_v3_large_cls_cree_pipeline_en.md | 70 ++++++++++ .../2024-09-06-mixed_distil_bert_en.md | 94 ++++++++++++++ ...024-09-06-mixed_distil_bert_pipeline_en.md | 70 ++++++++++ ...nli_microsoft_deberta_v3_base_seed_2_en.md | 94 ++++++++++++++ ...li_microsoft_deberta_v3_large_seed_1_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_1_pipeline_en.md | 70 ++++++++++ ...2024-09-06-mnli_roberta_large_seed_3_en.md | 94 ++++++++++++++ ...6-mnli_roberta_large_seed_3_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-model_test_en.md | 94 ++++++++++++++ .../2024-09-06-model_zip_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-mpnet_80k_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-06-mpnet_base_en.md | 94 ++++++++++++++ ...bedloss_msee_evaluator_salestax_docs_en.md | 86 +++++++++++++ ...see_evaluator_salestax_docs_pipeline_en.md | 69 ++++++++++ ...pnet_base_mimics_query_facet_encoder_en.md | 86 +++++++++++++ ..._mimics_query_facet_encoder_pipeline_en.md | 69 ++++++++++ ...ultilabel_sector_classifier_pipeline_en.md | 70 ++++++++++ ...pc_microsoft_deberta_v3_large_seed_2_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_2_pipeline_en.md | 70 ++++++++++ ...ingual_hate_speech_robacofi_pipeline_xx.md | 70 ++++++++++ ...m_roberta_for_ner_bcokdilli_pipeline_xx.md | 70 ++++++++++ ...ingual_xlm_roberta_for_ner_bcokdilli_xx.md | 94 ++++++++++++++ .../2024-09-06-naija_xlm_twitter_base_en.md | 94 ++++++++++++++ ...9-06-naija_xlm_twitter_base_pipeline_en.md | 70 ++++++++++ ...rn_sotho_v2_classification_finetuned_en.md | 94 ++++++++++++++ ...v2_classification_finetuned_pipeline_en.md | 70 ++++++++++ .../2024-09-06-nepal_bhasa_bert_v2_en.md | 94 ++++++++++++++ ...4-09-06-nepal_bhasa_bert_v2_pipeline_en.md | 70 ++++++++++ .../2024-09-06-nepal_bhasa_dummy_model_en.md | 98 ++++++++++++++ ...l_bhasa_phishing_email_detection_ful_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-06-ner_ecu_uda_en.md | 94 ++++++++++++++ .../2024-09-06-ner_ecu_uda_pipeline_en.md | 70 ++++++++++ .../2024-09-06-ner_model_cwchang_en.md | 94 ++++++++++++++ .../2024-09-06-ner_model_ep1_en.md | 94 ++++++++++++++ .../2024-09-06-ner_model_maccrobat_en.md | 94 ++++++++++++++ ...-09-06-ner_ner_random0_seed1_bernice_en.md | 94 ++++++++++++++ .../2024-09-06-ner_oee_techme_en.md | 94 ++++++++++++++ .../2024-09-06-ner_oee_techme_pipeline_en.md | 70 ++++++++++ ...024-09-06-nerd_nerd_temporal_bernice_en.md | 94 ++++++++++++++ ...-nerd_nerd_temporal_bernice_pipeline_en.md | 70 ++++++++++ .../2024-09-06-news_title_roberta_en.md | 94 ++++++++++++++ ...24-09-06-news_title_roberta_pipeline_en.md | 70 ++++++++++ ...-06-nlp_course_chapter4_test_model_1_en.md | 94 ++++++++++++++ ...ourse_chapter4_test_model_1_pipeline_en.md | 70 ++++++++++ .../2024-09-06-nlp_task_modified_en.md | 86 +++++++++++++ ...024-09-06-nlp_task_modified_pipeline_en.md | 69 ++++++++++ ...06-norwegian_bokml_whisper_base_beta_no.md | 84 ++++++++++++ ...ian_bokml_whisper_base_beta_pipeline_no.md | 69 ++++++++++ ...6-norwegian_bokml_whisper_small_beta_no.md | 84 ++++++++++++ ...an_bokml_whisper_small_beta_pipeline_no.md | 69 ++++++++++ ...l_whisper_small_verbatim_nbailabbeta_no.md | 84 ++++++++++++ ..._small_verbatim_nbailabbeta_pipeline_no.md | 69 ++++++++++ .../2024-09-06-nps_mpnet_lds_en.md | 86 +++++++++++++ .../2024-09-06-nusabert_base_en.md | 94 ++++++++++++++ .../2024-09-06-nusabert_base_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-ojobert_en.md | 94 ++++++++++++++ ...cher_v4_gpt35turbo_and_gpt4_pipeline_en.md | 70 ++++++++++ ...nga_islands_chinese_galgame_pipeline_en.md | 70 ++++++++++ ...tuned_english_tonga_tonga_islands_ac_en.md | 94 ++++++++++++++ ...nga_tonga_islands_thai_pong_pipeline_en.md | 70 ++++++++++ ...d_english_tonga_tonga_islands_german_en.md | 94 ++++++++++++++ ...tonga_tonga_islands_romanian_lamkhoi_en.md | 94 ++++++++++++++ ...ga_islands_romanian_lamkhoi_pipeline_en.md | 70 ++++++++++ ...nga_tonga_islands_romanian_souta0919_en.md | 94 ++++++++++++++ ..._islands_romanian_souta0919_pipeline_en.md | 70 ++++++++++ ...nga_islands_swahili_chituyi_pipeline_en.md | 70 ++++++++++ ..._russian_tonga_tonga_islands_english_en.md | 94 ++++++++++++++ ...sh_finetuned_npomo_english_10_epochs_en.md | 94 ++++++++++++++ ...ned_npomo_english_10_epochs_pipeline_en.md | 70 ++++++++++ .../2024-09-06-panx_xlmr_base_en.md | 94 ++++++++++++++ .../2024-09-06-panx_xlmr_base_pipeline_en.md | 70 ++++++++++ .../2024-09-06-pap2patentsberta_en.md | 86 +++++++++++++ ...2024-09-06-pap2patentsberta_pipeline_en.md | 69 ++++++++++ .../2024-09-06-paws_x_xlm_r_only_german_en.md | 94 ++++++++++++++ ...06-paws_x_xlm_r_only_german_pipeline_en.md | 70 ++++++++++ ...e_conditional_utilitarian_deberta_01_en.md | 94 ++++++++++++++ ...-06-pharo_classreferences_classifier_en.md | 86 +++++++++++++ ..._classreferences_classifier_pipeline_en.md | 69 ++++++++++ .../2024-09-06-phayathaibert_th.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-06-philberta_en.md | 94 ++++++++++++++ .../2024-09-06-philberta_pipeline_en.md | 70 ++++++++++ .../2024-09-06-pictalk_distil_en.md | 94 ++++++++++++++ .../2024-09-06-pictalk_distil_pipeline_en.md | 70 ++++++++++ .../2024-09-06-pii_roberta_large_en.md | 94 ++++++++++++++ ...024-09-06-pii_roberta_large_pipeline_en.md | 70 ++++++++++ .../2024-09-06-polyfaq_cross_pipeline_en.md | 70 ++++++++++ ...-portuguese_xlm_r_falsetrue_0_2_best_en.md | 94 ++++++++++++++ ...se_xlm_r_falsetrue_0_2_best_pipeline_en.md | 70 ++++++++++ ...predict_perception_xlmr_blame_victim_en.md | 94 ++++++++++++++ ...erception_xlmr_blame_victim_pipeline_en.md | 70 ++++++++++ .../2024-09-06-punct_model_v2_en.md | 94 ++++++++++++++ .../2024-09-06-punct_model_v2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-punctuate_en.md | 94 ++++++++++++++ .../2024-09-06-punctuate_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-q2d_128b_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-06-q2d_gpt_22_en.md | 86 +++++++++++++ .../2024-09-06-q2d_gpt_22_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-06-q2e_ep3_42_en.md | 86 +++++++++++++ .../2024-09-06-q2e_ep3_42_pipeline_en.md | 69 ++++++++++ .../2024-09-06-q_only_ep3_22_en.md | 86 +++++++++++++ .../2024-09-06-q_only_ep3_22_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-06-qa_distell0_en.md | 86 +++++++++++++ .../2024-09-06-qa_model2_sumittagadiya_en.md | 86 +++++++++++++ ...-06-qa_model2_sumittagadiya_pipeline_en.md | 69 ++++++++++ .../2024-09-06-qa_model3_pipeline_en.md | 69 ++++++++++ .../2024-09-06-qa_model_hansollll_en.md | 86 +++++++++++++ ...24-09-06-qa_model_hansollll_pipeline_en.md | 69 ++++++++++ .../2024-09-06-qa_model_sif10_en.md | 86 +++++++++++++ ...6-qa_real_data_test_xlm_roberta_base_en.md | 86 +++++++++++++ .../2024-09-06-qa_redaction_nov1_18_en.md | 86 +++++++++++++ .../2024-09-06-qa_redaction_nov1_19_a1_en.md | 86 +++++++++++++ ...-06-qa_redaction_nov1_19_a1_pipeline_en.md | 69 ++++++++++ ...6-qa_synth_21_sept_with_finetune_1_0_en.md | 86 +++++++++++++ ...h_21_sept_with_finetune_1_0_pipeline_en.md | 69 ++++++++++ ...h_25_sept_with_finetune_1_1_on19sept_en.md | 86 +++++++++++++ ..._with_finetune_1_1_on19sept_pipeline_en.md | 69 ++++++++++ ...unanswerable_23_aug_xlm_roberta_base_en.md | 86 +++++++++++++ ...rain_real_data_test_xlm_roberta_base_en.md | 86 +++++++++++++ ..._data_test_xlm_roberta_base_pipeline_en.md | 69 ++++++++++ ...uestion_answer_thirdeyedata_pipeline_en.md | 69 ++++++++++ ...question_answering_tutorial_practice_en.md | 86 +++++++++++++ ...answering_tutorial_practice_pipeline_en.md | 69 ++++++++++ ...2024-09-06-radbert_roberta_4m_zzxslp_en.md | 94 ++++++++++++++ ...6-radbert_roberta_4m_zzxslp_pipeline_en.md | 70 ++++++++++ .../2024-09-06-randomly_pruned_30_model_en.md | 94 ++++++++++++++ ...06-randomly_pruned_30_model_pipeline_en.md | 70 ++++++++++ ...024-09-06-refpydst_1p_icdst_split_v1_en.md | 86 +++++++++++++ ...-refpydst_1p_icdst_split_v1_pipeline_en.md | 69 ++++++++++ .../2024-09-06-results_soniquentin_en.md | 94 ++++++++++++++ ...4-09-06-results_soniquentin_pipeline_en.md | 70 ++++++++++ .../2024-09-06-reward_deberta_v3_en.md | 94 ++++++++++++++ ...024-09-06-reward_deberta_v3_pipeline_en.md | 70 ++++++++++ ...06-rg_fake_signatures_southern_sotho_en.md | 94 ++++++++++++++ ...e_signatures_southern_sotho_pipeline_en.md | 70 ++++++++++ .../2024-09-06-roberta_babe_ft_pipeline_en.md | 70 ++++++++++ .../2024-09-06-roberta_base_french_fr.md | 94 ++++++++++++++ ...4-09-06-roberta_base_french_pipeline_fr.md | 70 ++++++++++ ...-roberta_base_multinerd_jayant_yadav_en.md | 94 ++++++++++++++ ...base_multinerd_jayant_yadav_pipeline_en.md | 70 ++++++++++ ...a_classifier_large_finetuned_clinc_1_en.md | 104 +++++++++++++++ ...ta_large_bne_capitel_sayula_popoluca_es.md | 94 ++++++++++++++ ...bne_capitel_sayula_popoluca_pipeline_es.md | 70 ++++++++++ .../2024-09-06-roberta_large_boolq_en.md | 94 ++++++++++++++ ...4-09-06-roberta_large_boolq_pipeline_en.md | 70 ++++++++++ ...06-roberta_large_finnish_v2_pipeline_fi.md | 70 ++++++++++ ...9-06-roberta_ner_graphcodebert_MT4TS_en.md | 106 ++++++++++++++++ ...ta_ner_roberta_large_tweetner_random_en.md | 112 ++++++++++++++++ .../2024-09-06-roberta_small_en.md | 94 ++++++++++++++ .../2024-09-06-roberta_small_pipeline_en.md | 70 ++++++++++ .../2024-09-06-rohit_setfit_model_en.md | 86 +++++++++++++ ...24-09-06-rohit_setfit_model_pipeline_en.md | 69 ++++++++++ ...oes_microsoft_deberta_v3_base_seed_2_en.md | 94 ++++++++++++++ .../2024-09-06-rpa_synth_08nov_en.md | 86 +++++++++++++ .../2024-09-06-rpa_synth_08nov_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-06-sb_temfac_en.md | 92 ++++++++++++++ .../2024-09-06-sb_temfac_pipeline_en.md | 70 ++++++++++ ...nscience_massive_all_1_1_gamma_jason_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-06-secroberta_en.md | 94 ++++++++++++++ .../2024-09-06-secroberta_pipeline_en.md | 70 ++++++++++ ...024-09-06-securebert_aptner_pipeline_en.md | 70 ++++++++++ .../2024-09-06-semantic_test_en.md | 86 +++++++++++++ .../2024-09-06-semantic_test_pipeline_en.md | 69 ++++++++++ .../2024-09-06-sembr2023_bert_small_en.md | 94 ++++++++++++++ ..._afro_xlmr_mini_finetuned_kintweetsc_en.md | 94 ++++++++++++++ ...r_mini_finetuned_kintweetsc_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_analysis_en.md | 94 ++++++++++++++ .../2024-09-06-sent_analysis_pipeline_en.md | 70 ++++++++++ ...sent_bert_base_english_chinese_cased_en.md | 94 ++++++++++++++ ..._base_english_chinese_cased_pipeline_en.md | 71 +++++++++++ ...historic_multilingual_cased_pipeline_xx.md | 71 +++++++++++ ...gual_cased_finetuned_yoruba_pipeline_xx.md | 71 +++++++++++ ..._multilingual_cased_finetuned_yoruba_xx.md | 94 ++++++++++++++ ...rt_base_romanian_uncased_v1_pipeline_ro.md | 71 +++++++++++ .../2024-09-06-sent_bert_for_patents_en.md | 94 ++++++++++++++ ...09-06-sent_bert_for_patents_pipeline_en.md | 71 +++++++++++ ...t_large_uncased_semeval2014_pipeline_en.md | 71 +++++++++++ ..._bert_medieval_multilingual_pipeline_xx.md | 71 +++++++++++ ...9-06-sent_bert_medieval_multilingual_xx.md | 94 ++++++++++++++ ..._biomedvlp_cxr_bert_general_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_checkpoint_12600_en.md | 94 ++++++++++++++ .../2024-09-06-sent_czert_b_base_cased_cs.md | 94 ++++++++++++++ ...-06-sent_czert_b_base_cased_pipeline_cs.md | 71 +++++++++++ ...t_entitycs_39_mlm_xlmr_base_pipeline_xx.md | 71 +++++++++++ ...09-06-sent_entitycs_39_mlm_xlmr_base_xx.md | 94 ++++++++++++++ .../2024-09-06-sent_estroberta_et.md | 94 ++++++++++++++ .../2024-09-06-sent_estroberta_pipeline_et.md | 71 +++++++++++ .../ahmedlone127/2024-09-06-sent_fbert_en.md | 94 ++++++++++++++ .../2024-09-06-sent_fbert_pipeline_en.md | 71 +++++++++++ ...9-06-sent_gbert_large_finetuned_cust_en.md | 94 ++++++++++++++ ..._gbert_large_finetuned_cust_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_hing_mbert_hi.md | 94 ++++++++++++++ ...ent_indicbertv2_mlm_sam_tlm_pipeline_xx.md | 71 +++++++++++ ...4-09-06-sent_indicbertv2_mlm_sam_tlm_xx.md | 94 ++++++++++++++ .../2024-09-06-sent_koobert_pipeline_xx.md | 71 +++++++++++ ...9-06-sent_legal_bert_base_cased_ptbr_pt.md | 94 ++++++++++++++ ...neural_cherche_sparse_embed_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_odia_bert_pipeline_or.md | 71 +++++++++++ .../2024-09-06-sent_ofa_multi_400_en.md | 94 ++++++++++++++ ...24-09-06-sent_ofa_multi_400_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_ope_bert_v2_1_en.md | 94 ++++++++++++++ ...24-09-06-sent_ope_bert_v2_1_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_panx_xlmr_base_en.md | 94 ++++++++++++++ ...4-09-06-sent_panx_xlmr_base_pipeline_en.md | 71 +++++++++++ .../2024-09-06-sent_psych_search_en.md | 94 ++++++++++++++ ...024-09-06-sent_psych_search_pipeline_en.md | 71 +++++++++++ ...06-sent_roberta_base_exp_32_pipeline_xx.md | 71 +++++++++++ .../2024-09-06-sent_roberta_base_exp_32_xx.md | 94 ++++++++++++++ ...6-sent_splade_sparse_vector_pinecone_en.md | 94 ++++++++++++++ ...nt_xlm_roberta_base_finetuned_dholuo_en.md | 94 ++++++++++++++ ...berta_base_finetuned_dholuo_pipeline_en.md | 71 +++++++++++ ..._xlm_roberta_base_finetuned_digikala_en.md | 94 ++++++++++++++ ...rta_base_finetuned_digikala_pipeline_en.md | 71 +++++++++++ ...netuned_on_runaways_english_pipeline_en.md | 71 +++++++++++ ...ent_xlm_roberta_base_finetuned_shona_en.md | 94 ++++++++++++++ ...oberta_base_finetuned_shona_pipeline_en.md | 71 +++++++++++ ...oberta_base_finetuned_xhosa_pipeline_en.md | 71 +++++++++++ ...nt_xlm_roberta_base_finetuned_yoruba_en.md | 94 ++++++++++++++ .../2024-09-06-sentiment_analyser_joyo1_en.md | 94 ++++++++++++++ ...06-sentiment_analyser_joyo1_pipeline_en.md | 70 ++++++++++ ...timent_analysis_sayula_popoluca_neg1_en.md | 94 ++++++++++++++ ...alysis_sayula_popoluca_neg1_pipeline_en.md | 70 ++++++++++ ...sentiment_ohb3_xlm_roberta_hungarian_hu.md | 94 ++++++++++++++ ..._ohb3_xlm_roberta_hungarian_pipeline_hu.md | 70 ++++++++++ ...ent_sentiment_small_temporal_bernice_en.md | 94 ++++++++++++++ ...ment_small_temporal_bernice_pipeline_en.md | 70 ++++++++++ ...etuned_financial_text_classification_en.md | 86 +++++++++++++ ...nancial_text_classification_pipeline_en.md | 69 ++++++++++ ...06-setfit_mbti_multiclass_w266_nov29_en.md | 86 +++++++++++++ ..._mbti_multiclass_w266_nov29_pipeline_en.md | 69 ++++++++++ ...b11_misinformation_on_global_support_en.md | 86 +++++++++++++ ...formation_on_global_support_pipeline_en.md | 69 ++++++++++ .../2024-09-06-setfit_model_test_2_en.md | 86 +++++++++++++ ...nli_microsoft_deberta_v3_base_seed_2_en.md | 94 ++++++++++++++ ...soft_deberta_v3_base_seed_2_pipeline_en.md | 70 ++++++++++ ...li_microsoft_deberta_v3_large_seed_1_en.md | 94 ++++++++++++++ ...oft_deberta_v3_large_seed_1_pipeline_en.md | 70 ++++++++++ ...06-song_artist_classifier_v7_alberta_en.md | 94 ++++++++++++++ ...rtist_classifier_v7_alberta_pipeline_en.md | 70 ++++++++++ ...otho_all_mpnet_finetuned_arabic_1500_en.md | 86 +++++++++++++ .../2024-09-06-spanish_english_en.md | 94 ++++++++++++++ .../2024-09-06-spanish_english_pipeline_en.md | 70 ++++++++++ ...-09-06-splade_sparse_vector_pinecone_en.md | 94 ++++++++++++++ ...lade_sparse_vector_pinecone_pipeline_en.md | 70 ++++++++++ ...soft_deberta_v3_base_seed_2_pipeline_en.md | 70 ++++++++++ ...soft_deberta_v3_base_seed_3_pipeline_en.md | 70 ++++++++++ ..._radiology_reports_and_i2b2_pipeline_en.md | 70 ++++++++++ ...6-sungbeom_whisper_small_korean_set9_ko.md | 84 ++++++++++++ ...m_whisper_small_korean_set9_pipeline_ko.md | 69 ++++++++++ ...-09-06-t2t_gun_nlth_from_base_warmup_en.md | 94 ++++++++++++++ ...odel_deberta__aug_method_ri_pipeline_en.md | 70 ++++++++++ ..._task__model_deberta__aug_method_eda_en.md | 94 ++++++++++++++ ...del_deberta__aug_method_eda_pipeline_en.md | 70 ++++++++++ ...ssification_distilbert_imdb_pipeline_en.md | 70 ++++++++++ .../2024-09-06-tb_xlm_r_fpt_en.md | 94 ++++++++++++++ ...06-tcfd_recommendation_classifier_v1_en.md | 94 ++++++++++++++ ..._combined_data_date_10_07_2024_14_33_he.md | 84 ++++++++++++ ..._data_date_10_07_2024_14_33_pipeline_he.md | 69 ++++++++++ .../2024-09-06-telugu_bertu_ner_en.md | 94 ++++++++++++++ ...024-09-06-test_model_tambeo_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-test_ner_en.md | 92 ++++++++++++++ .../2024-09-06-test_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-06-test_nhoxxkienn_en.md | 86 +++++++++++++ .../2024-09-06-test_qa_model_1_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-06-test_test_en.md | 86 +++++++++++++ ...-06-test_with_web_interface_pipeline_en.md | 70 ++++++++++ ...4-09-06-text_classification_yaqinlah_en.md | 94 ++++++++++++++ ...ext_classification_yaqinlah_pipeline_en.md | 70 ++++++++++ .../2024-09-06-text_complexity_roberta_en.md | 94 ++++++++++++++ .../2024-09-06-tiny_bert_0102_5000_en.md | 94 ++++++++++++++ .../2024-09-06-tiny_bert_0102_last_iter_en.md | 94 ++++++++++++++ ...andom_mpnetforsequenceclassification_en.md | 94 ++++++++++++++ .../2024-09-06-tntoeng_by_ibrahim_v1_en.md | 94 ++++++++++++++ ...09-06-tntoeng_by_ibrahim_v1_pipeline_en.md | 70 ++++++++++ ...topic_random0_seed2_bernice_pipeline_en.md | 70 ++++++++++ ...torch_distilbert_policies_comparison_en.md | 94 ++++++++++++++ .../2024-09-06-toxicity_model_es.md | 94 ++++++++++++++ .../2024-09-06-toxicity_model_pipeline_es.md | 70 ++++++++++ ...024-09-06-translation_english_lug_v3_en.md | 94 ++++++++++++++ ...-translation_english_lug_v3_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-06-tuf_fewshot_en.md | 86 +++++++++++++ .../2024-09-06-tuf_fewshot_pipeline_en.md | 69 ++++++++++ .../2024-09-06-useless_model_try_1_en.md | 94 ++++++++++++++ .../2024-09-06-village_model_pipeline_en.md | 69 ++++++++++ .../2024-09-06-whisper_age_estimator_hi.md | 84 ++++++++++++ ...09-06-whisper_age_estimator_pipeline_hi.md | 69 ++++++++++ ...per_base_fine_tuned_russian_pipeline_ru.md | 69 ++++++++++ ...9-06-whisper_base_fine_tuned_russian_ru.md | 84 ++++++++++++ ...2024-09-06-whisper_base_wolof_cifope_en.md | 84 ++++++++++++ ...6-whisper_base_wolof_cifope_pipeline_en.md | 69 ++++++++++ ..._phoneme_transcription_g2p_example_3_en.md | 84 ++++++++++++ ...transcription_g2p_example_3_pipeline_en.md | 69 ++++++++++ .../2024-09-06-whisper_italian_small_it.md | 84 ++++++++++++ ...09-06-whisper_italian_small_pipeline_it.md | 69 ++++++++++ ...24-09-06-whisper_medium_arabic_arbml_en.md | 84 ++++++++++++ .../2024-09-06-whisper_medium_eg_ar.md | 84 ++++++++++++ ...6-whisper_medium_english_santhosh643_en.md | 84 ++++++++++++ ...9-06-whisper_medium_medical_pipeline_en.md | 69 ++++++++++ ...9-06-whisper_medium_portuguese_3000h_en.md | 84 ++++++++++++ .../2024-09-06-whisper_samll_hassanya_en.md | 84 ++++++++++++ ...9-06-whisper_samll_hassanya_pipeline_en.md | 69 ++++++++++ ...-09-06-whisper_small_arabic_gnsepili_ar.md | 84 ++++++++++++ ...isper_small_arabic_gnsepili_pipeline_ar.md | 69 ++++++++++ .../2024-09-06-whisper_small_bb_vahn98_en.md | 84 ++++++++++++ ...-06-whisper_small_bb_vahn98_pipeline_en.md | 69 ++++++++++ .../2024-09-06-whisper_small_belarusian_be.md | 84 ++++++++++++ ...06-whisper_small_belarusian_pipeline_be.md | 69 ++++++++++ ...06-whisper_small_breton_arzhurkoadek_br.md | 84 ++++++++++++ ...r_small_breton_arzhurkoadek_pipeline_br.md | 69 ++++++++++ ...-09-06-whisper_small_chinese_desktop_en.md | 84 ++++++++++++ ...isper_small_chinese_desktop_pipeline_en.md | 69 ++++++++++ ...small_chinese_twi_kimbochen_pipeline_zh.md | 69 ++++++++++ ...-whisper_small_chinese_twi_kimbochen_zh.md | 84 ++++++++++++ ...4-09-06-whisper_small_chuvash_arabic_ar.md | 84 ++++++++++++ ...hisper_small_chuvash_arabic_pipeline_ar.md | 69 ++++++++++ ...024-09-06-whisper_small_common_voice_fa.md | 84 ++++++++++++ ...-whisper_small_common_voice_pipeline_fa.md | 69 ++++++++++ ...2024-09-06-whisper_small_cv11_german_de.md | 84 ++++++++++++ ...6-whisper_small_cv11_german_pipeline_de.md | 69 ++++++++++ ...24-09-06-whisper_small_darija_mryasu_en.md | 84 ++++++++++++ ...whisper_small_darija_mryasu_pipeline_en.md | 69 ++++++++++ ...09-06-whisper_small_english_accented_en.md | 84 ++++++++++++ ...er_small_english_hindi_venkatesh4342_en.md | 84 ++++++++++++ ...english_hindi_venkatesh4342_pipeline_en.md | 69 ++++++++++ ...4-09-06-whisper_small_hausa_mkalbani_en.md | 84 ++++++++++++ ...hisper_small_hausa_mkalbani_pipeline_en.md | 69 ++++++++++ ...isper_small_indonesian_cv17_pipeline_en.md | 69 ++++++++++ ...-whisper_small_indonesian_tonimurfid_id.md | 84 ++++++++++++ ...small_indonesian_tonimurfid_pipeline_id.md | 69 ++++++++++ .../2024-09-06-whisper_small_khmer_v2_km.md | 84 ++++++++++++ ...9-06-whisper_small_khmer_v2_pipeline_km.md | 69 ++++++++++ ...-06-whisper_small_korean_haseong8012_ko.md | 84 ++++++++++++ ...-09-06-whisper_small_korean_sungbeom_ko.md | 84 ++++++++++++ ...isper_small_korean_sungbeom_pipeline_ko.md | 69 ++++++++++ ...24-09-06-whisper_small_korean_zeroth_en.md | 84 ++++++++++++ ...whisper_small_korean_zeroth_pipeline_en.md | 69 ++++++++++ ...06-whisper_small_latvian_raivisdejus_lv.md | 84 ++++++++++++ ...r_small_latvian_raivisdejus_pipeline_lv.md | 69 ++++++++++ ...024-09-06-whisper_small_malayalam_v5_ml.md | 84 ++++++++++++ ...-whisper_small_malayalam_v5_pipeline_ml.md | 69 ++++++++++ ...sper_small_mongolian_erkhem_gantulga_mn.md | 84 ++++++++++++ ...l_mongolian_erkhem_gantulga_pipeline_mn.md | 69 ++++++++++ ...9-06-whisper_small_sinhala_fine_tune_en.md | 84 ++++++++++++ ...per_small_sinhala_fine_tune_pipeline_en.md | 69 ++++++++++ ...09-06-whisper_small_spanish_clu_ling_en.md | 84 ++++++++++++ ...sper_small_spanish_clu_ling_pipeline_en.md | 69 ++++++++++ ...ish_nemo_unified_2024_07_02_15_19_06_en.md | 84 ++++++++++++ ...mall_swahili_asr_chuvash_14_pipeline_sw.md | 69 ++++++++++ ...whisper_small_swahili_asr_chuvash_14_sw.md | 84 ++++++++++++ ...per_small_tamil_parambharat_pipeline_ta.md | 69 ++++++++++ ...9-06-whisper_small_tamil_parambharat_ta.md | 84 ++++++++++++ ...4-09-06-whisper_small_xhosa_pipeline_xh.md | 69 ++++++++++ .../2024-09-06-whisper_small_xhosa_xh.md | 84 ++++++++++++ ...4-09-06-whisper_telugu_base_pipeline_te.md | 69 ++++++++++ .../2024-09-06-whisper_telugu_base_te.md | 84 ++++++++++++ ..._test_quant_quantized_samagradatagov_en.md | 84 ++++++++++++ ...nt_quantized_samagradatagov_pipeline_en.md | 69 ++++++++++ ...2024-09-06-whisper_tiny_arabic_quran_en.md | 84 ++++++++++++ ...6-whisper_tiny_arabic_quran_pipeline_en.md | 69 ++++++++++ ...4-09-06-whisper_tiny_english_atcosim_en.md | 84 ++++++++++++ ...hisper_tiny_english_atcosim_pipeline_en.md | 69 ++++++++++ ...2024-09-06-whisper_tiny_french_cased_fr.md | 84 ++++++++++++ ...6-whisper_tiny_french_cased_pipeline_fr.md | 69 ++++++++++ ...isper_tiny_german_primeline_pipeline_de.md | 69 ++++++++++ ...4-09-06-whisper_tiny_korean_pipeline_ko.md | 69 ++++++++++ .../2024-09-06-whisper_tiny_latvian_lv.md | 84 ++++++++++++ .../2024-09-06-whisper_tiny_nepali_ne.md | 84 ++++++++++++ ...4-09-06-whisper_tiny_nepali_pipeline_ne.md | 69 ++++++++++ ...-whisper_tiny_persian_farsi_pipeline_fa.md | 69 ++++++++++ ...9-06-whisper_tiny_serbian_yodas_v0_2_en.md | 84 ++++++++++++ .../2024-09-06-wikidata_simplequestions_en.md | 94 ++++++++++++++ ...06-wikidata_simplequestions_pipeline_en.md | 70 ++++++++++ ...-06-xlm_roberta_base_claim_detection_en.md | 94 ++++++++++++++ ...oberta_base_claim_detection_pipeline_en.md | 70 ++++++++++ ...etuned_augument_visquad2_20_3_2023_1_en.md | 86 +++++++++++++ ...gument_visquad2_20_3_2023_1_pipeline_en.md | 69 ++++++++++ ..._base_finetuned_burmese_dear_watson2_en.md | 94 ++++++++++++++ ...etuned_burmese_dear_watson2_pipeline_en.md | 70 ++++++++++ ...6-xlm_roberta_base_finetuned_ecoicop_en.md | 94 ++++++++++++++ ...erta_base_finetuned_ecoicop_pipeline_en.md | 70 ++++++++++ ...finetuned_emotion_37_labels_pipeline_en.md | 70 ++++++++++ ...rta_base_finetuned_marc_english_hslu_en.md | 94 ++++++++++++++ ...finetuned_marc_english_hslu_pipeline_en.md | 70 ++++++++++ ...epal_bhasa_data_visquad2_13_3_2023_3_en.md | 86 +++++++++++++ ...a_data_visquad2_13_3_2023_3_pipeline_en.md | 69 ++++++++++ ...berta_base_finetuned_panx_all_100yen_en.md | 94 ++++++++++++++ ...e_finetuned_panx_all_100yen_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_panx_all_54data_en.md | 94 ++++++++++++++ ...e_finetuned_panx_all_54data_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_all_buruzaemon_en.md | 94 ++++++++++++++ ...netuned_panx_all_buruzaemon_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_panx_all_gcmsrc_en.md | 94 ++++++++++++++ ...e_finetuned_panx_all_gcmsrc_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_english_ankit15nov_en.md | 94 ++++++++++++++ ...ned_panx_english_ankit15nov_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_english_cataluna84_en.md | 94 ++++++++++++++ ...ned_panx_english_cataluna84_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_english_leosol_en.md | 94 ++++++++++++++ ...ase_finetuned_panx_french_ankit15nov_en.md | 94 ++++++++++++++ ...ta_base_finetuned_panx_french_sorabe_en.md | 94 ++++++++++++++ ...inetuned_panx_french_sorabe_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_french_sungwoo1_en.md | 94 ++++++++++++++ ...etuned_panx_french_sungwoo1_pipeline_en.md | 70 ++++++++++ ...netuned_panx_french_transformersbook_en.md | 94 ++++++++++++++ ...e_finetuned_panx_french_youngbreadho_en.md | 94 ++++++++++++++ ...ed_panx_french_youngbreadho_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_german_adalberti_en.md | 94 ++++++++++++++ ...tuned_panx_german_adalberti_pipeline_en.md | 70 ++++++++++ ...finetuned_panx_german_ahmad_alismail_en.md | 94 ++++++++++++++ ...a_base_finetuned_panx_german_francos_en.md | 94 ++++++++++++++ ...netuned_panx_german_francos_pipeline_en.md | 70 ++++++++++ ..._finetuned_panx_german_french_gcmsrc_en.md | 94 ++++++++++++++ ...uned_panx_german_french_reinoudbosch_en.md | 94 ++++++++++++++ ..._german_french_reinoudbosch_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_german_french_rupe_en.md | 94 ++++++++++++++ ...ned_panx_german_french_rupe_pipeline_en.md | 70 ++++++++++ ..._finetuned_panx_german_french_songys_en.md | 94 ++++++++++++++ ...d_panx_german_french_songys_pipeline_en.md | 70 ++++++++++ ...tuned_panx_german_french_wooseok0303_en.md | 94 ++++++++++++++ ...x_german_french_wooseok0303_pipeline_en.md | 70 ++++++++++ ..._finetuned_panx_german_french_zebans_en.md | 94 ++++++++++++++ ..._base_finetuned_panx_german_ghks4861_en.md | 94 ++++++++++++++ ...se_finetuned_panx_german_jpaulhunter_en.md | 94 ++++++++++++++ ...ned_panx_german_jpaulhunter_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_german_jslowik_en.md | 94 ++++++++++++++ ...netuned_panx_german_jslowik_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_german_kuma_s_en.md | 94 ++++++++++++++ ...inetuned_panx_german_kuma_s_pipeline_en.md | 70 ++++++++++ ...ase_finetuned_panx_german_mmenendezg_en.md | 94 ++++++++++++++ ...uned_panx_german_mmenendezg_pipeline_en.md | 70 ++++++++++ ...inetuned_panx_german_nadezda_at_htec_en.md | 94 ++++++++++++++ ...panx_german_nadezda_at_htec_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_german_nerdai_en.md | 94 ++++++++++++++ ...inetuned_panx_german_nerdai_pipeline_en.md | 70 ++++++++++ ..._finetuned_panx_german_roshanrai1304_en.md | 94 ++++++++++++++ ...d_panx_german_roshanrai1304_pipeline_en.md | 70 ++++++++++ ...inetuned_panx_italian_amitjain171980_en.md | 94 ++++++++++++++ ...panx_italian_amitjain171980_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_italian_vonewman_en.md | 94 ++++++++++++++ ...tuned_panx_italian_vonewman_pipeline_en.md | 70 ++++++++++ ...9-06-xlm_roberta_base_finetuned_rugo_en.md | 94 ++++++++++++++ ...a_base_finetuned_squad_au_jluckyboyj_en.md | 86 +++++++++++++ ...netuned_squad_au_jluckyboyj_pipeline_en.md | 69 ++++++++++ ...xlm_roberta_base_finetuned_visquad_2_en.md | 86 +++++++++++++ ...ta_base_finetuned_visquad_2_pipeline_en.md | 69 ++++++++++ ..._ft_udpos213_top9lang_southern_sotho_en.md | 94 ++++++++++++++ ...213_top9lang_southern_sotho_pipeline_en.md | 70 ++++++++++ ...meval21_toxic_with_data_augmentation_en.md | 94 ++++++++++++++ ...xlm_roberta_base_hungarian_ner_huner_hu.md | 94 ++++++++++++++ ...epal_bhasa_vietnam_aug_insert_bert_1_en.md | 94 ++++++++++++++ ...a_vietnam_aug_insert_bert_1_pipeline_en.md | 70 ++++++++++ ...lm_roberta_base_panx_dataset_russian_en.md | 94 ++++++++++++++ ...a_base_panx_dataset_russian_pipeline_en.md | 70 ++++++++++ ...ssian_sentiment_rusentiment_pipeline_ru.md | 70 ++++++++++ ..._spanish_60000_xnli_spanish_pipeline_en.md | 70 ++++++++++ ...ase_tweet_sentiment_spanish_pipeline_en.md | 70 ++++++++++ ...-09-06-xlm_roberta_base_uncased_pina_en.md | 94 ++++++++++++++ ...9-06-xlm_roberta_base_wolof_pipeline_en.md | 70 ++++++++++ ...erta_base_xnli_french_trimmed_french_en.md | 94 ++++++++++++++ ...ndosquadv2_1694025792_8_2e_06_0_01_5_en.md | 86 +++++++++++++ ...2_1694025792_8_2e_06_0_01_5_pipeline_en.md | 69 ++++++++++ ...dosquadv2_1694026018_16_2e_05_0_01_3_en.md | 86 +++++++++++++ ..._1694026018_16_2e_05_0_01_3_pipeline_en.md | 69 ++++++++++ ...ndosquadv2_1694026058_8_2e_05_0_01_3_en.md | 86 +++++++++++++ ...2_1694026058_8_2e_05_0_01_3_pipeline_en.md | 69 ++++++++++ ...m_roberta_low_resource_langid_large2_en.md | 94 ++++++++++++++ ..._low_resource_langid_large2_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_tydiqa_pipeline_sw.md | 69 ++++++++++ ...a_qa_afriberta_base_finetuned_tydiqa_sw.md | 106 ++++++++++++++++ ...nlp_more_fine_tune_24465520_26265900_en.md | 106 ++++++++++++++++ ...fine_tune_24465520_26265900_pipeline_en.md | 69 ++++++++++ ...nlp_more_fine_tune_24465520_26265904_en.md | 106 ++++++++++++++++ ...09-06-xlm_roberta_qa_distill_xlm_mrc_en.md | 106 ++++++++++++++++ ..._roberta_qa_distill_xlm_mrc_pipeline_en.md | 69 ++++++++++ .../2024-09-06-xlm_roberta_thai_2_th.md | 86 +++++++++++++ ...sh_german_all_shuffled_1985_test1000_en.md | 94 ++++++++++++++ ..._all_shuffled_1985_test1000_pipeline_en.md | 70 ++++++++++ ...s_qa_icelandic_finetune_hindi_course_en.md | 86 +++++++++++++ ...andic_finetune_hindi_course_pipeline_en.md | 69 ++++++++++ .../2024-09-06-xlmr_idkmrc_webis_en.md | 86 +++++++++++++ ...024-09-06-xlmr_idkmrc_webis_pipeline_en.md | 69 ++++++++++ ...mroberta_finetuned_recipeqa_modified_en.md | 86 +++++++++++++ ...finetuned_recipeqa_modified_pipeline_en.md | 69 ++++++++++ ...berta_ner_base_finetuned_ner_swahili_sw.md | 115 +++++++++++++++++ ...ahili_finetuned_ner_swahili_pipeline_sw.md | 70 ++++++++++ ...etuned_swahili_finetuned_ner_swahili_sw.md | 115 +++++++++++++++++ ...base_uncased_mit_restaurant_pipeline_en.md | 70 ++++++++++ ...j_mills_base_finetuned_panx_pipeline_xx.md | 70 ++++++++++ ...rta_ner_cj_mills_base_finetuned_panx_xx.md | 112 ++++++++++++++++ ...berta_ner_haesun_base_finetuned_panx_en.md | 113 +++++++++++++++++ ..._ner_jamesmarcel_base_finetuned_panx_de.md | 113 +++++++++++++++++ ...ffi_base_finetuned_panx_all_pipeline_xx.md | 70 ++++++++++ ..._ner_jgriffi_base_finetuned_panx_all_xx.md | 112 ++++++++++++++++ ..._ner_naomiyjchen_base_finetuned_panx_de.md | 113 +++++++++++++++++ ...iyjchen_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ ...er_olpa_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ ...a_ner_rishav_hub_base_finetuned_panx_de.md | 113 +++++++++++++++++ ...hav_hub_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ ...matpagi_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ ...178_base_finetuned_panx_all_pipeline_xx.md | 70 ++++++++++ ..._ner_skr3178_base_finetuned_panx_all_xx.md | 112 ++++++++++++++++ ...ta_ner_xugenpeng_base_finetuned_panx_de.md | 112 ++++++++++++++++ ...genpeng_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ .../2024-09-06-xml_roberta_model_qa_en.md | 86 +++++++++++++ ...-09-06-xml_roberta_model_qa_pipeline_en.md | 69 ++++++++++ .../2024-09-06-yappychappysimple_en.md | 94 ++++++++++++++ ...024-09-06-yappychappysimple_pipeline_en.md | 70 ++++++++++ ...-09-06-zabantu_sot_ven_170m_pipeline_ve.md | 70 ++++++++++ .../2024-09-06-zabantu_sot_ven_170m_ve.md | 94 ++++++++++++++ ...09-07-2020_q2_full_tweets_combined90_en.md | 94 ++++++++++++++ ...0_q2_full_tweets_combined90_pipeline_en.md | 70 ++++++++++ ..._with_balanced_with_add_one_sentence_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-accu_3_en.md | 94 ++++++++++++++ .../2024-09-07-accu_3_pipeline_en.md | 70 ++++++++++ ...crossapps_ndd_petclinic_test_content_en.md | 94 ++++++++++++++ ..._ndd_petclinic_test_content_pipeline_en.md | 70 ++++++++++ ...09-07-action_policy_plans_classifier_en.md | 92 ++++++++++++++ .../2024-09-07-agric_eng_lug_en.md | 94 ++++++++++++++ ...-ahisto_ner_model_s_mu_nlpc_pipeline_en.md | 70 ++++++++++ .../2024-09-07-albert_kor_base_tweak_ko.md | 94 ++++++++++++++ ...09-07-albert_kor_base_tweak_pipeline_ko.md | 70 ++++++++++ .../2024-09-07-albert_minebgsd01_en.md | 94 ++++++++++++++ ...024-09-07-albert_minebgsd01_pipeline_en.md | 70 ++++++++++ .../2024-09-07-albert_test_model_2_en.md | 94 ++++++++++++++ ...4-09-07-albert_test_model_2_pipeline_en.md | 70 ++++++++++ ...-09-07-albert_tiny_chinese_david_ner_en.md | 94 ++++++++++++++ ..._turkish_turkish_spam_email_pipeline_tr.md | 70 ++++++++++ ...07-albert_turkish_turkish_spam_email_tr.md | 94 ++++++++++++++ .../2024-09-07-albert_tweet_en.md | 94 ++++++++++++++ .../2024-09-07-albert_tweet_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-aliens_en.md | 94 ++++++++++++++ ..._base_newtriplets_v2_lr_2e_7_m_5_e_3_en.md | 86 +++++++++++++ ...024-09-07-all_mpnet_base_v2_20240102_en.md | 94 ++++++++++++++ ...pnet_base_v2_ledgar_full_contrastive_en.md | 86 +++++++++++++ ...v2_lr_5e_7_margin_1_epoch_1_pipeline_en.md | 69 ++++++++++ ...pnet_lr5e_8_margin_1_ep_3_bosnian_64_en.md | 86 +++++++++++++ ..._8_margin_1_ep_3_bosnian_64_pipeline_en.md | 69 ++++++++++ ...tapt_pretrained_norwegian_condencing_en.md | 94 ++++++++++++++ .../2024-09-07-arabic_bert_model_ar.md | 94 ++++++++++++++ ...024-09-07-arabic_bert_model_pipeline_ar.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-arbertv2_ar.md | 94 ++++++++++++++ .../2024-09-07-arbertv2_pipeline_ar.md | 70 ++++++++++ ...-07-argureviews_sentiment_roberta_v1_en.md | 94 ++++++++++++++ ...eviews_sentiment_roberta_v1_pipeline_en.md | 70 ++++++++++ ...07-arywiki_20230101_roberta_mlm_bots_ar.md | 94 ++++++++++++++ ...i_20230101_roberta_mlm_bots_pipeline_ar.md | 70 ++++++++++ .../2024-09-07-augmented_distillbert_en.md | 94 ++++++++++++++ .../2024-09-07-autotrain_qna_1170143354_en.md | 86 +++++++++++++ ...07-bce_reranker_base_v1_maidalun1020_en.md | 94 ++++++++++++++ ...ranker_base_v1_maidalun1020_pipeline_en.md | 70 ++++++++++ .../2024-09-07-bcms_bertic_ner_hr.md | 94 ++++++++++++++ .../2024-09-07-bcms_bertic_ner_pipeline_hr.md | 70 ++++++++++ .../2024-09-07-benjys_first_model_en.md | 94 ++++++++++++++ ...4-09-07-berel_finetuned_dss_maskedlm_en.md | 94 ++++++++++++++ ...erel_finetuned_dss_maskedlm_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-bert_b02_en.md | 94 ++++++++++++++ .../2024-09-07-bert_b02_pipeline_en.md | 70 ++++++++++ ...rt_base_cased_ner_conll2003_pipeline_en.md | 70 ++++++++++ ..._cased_finetuned_mbert_finetuned_ner_en.md | 94 ++++++++++++++ ...gual_uncased_finetuned_urdu_pipeline_xx.md | 70 ++++++++++ ..._multilingual_uncased_finetuned_urdu_xx.md | 94 ++++++++++++++ .../2024-09-07-bert_base_ner_pii_fn_en.md | 94 ++++++++++++++ ...-09-07-bert_base_turkish_uncased_ner_tr.md | 94 ++++++++++++++ ...-09-07-bert_finetuned_ner4_nathali99_en.md | 94 ++++++++++++++ ...rt_finetuned_ner4_nathali99_pipeline_en.md | 70 ++++++++++ .../2024-09-07-bert_finetuned_ner_en.md | 94 ++++++++++++++ ...24-09-07-bert_finetuned_ner_pipeline_en.md | 70 ++++++++++ ...9-07-bert_fromscratch_galician_large_en.md | 94 ++++++++++++++ ..._fromscratch_galician_large_pipeline_en.md | 70 ++++++++++ ...024-09-07-bert_indo_base_uncased_ner_en.md | 94 ++++++++++++++ ...-bert_indo_base_uncased_ner_pipeline_en.md | 70 ++++++++++ .../2024-09-07-bert_l6_h768_uncased_en.md | 94 ++++++++++++++ ...-09-07-bert_l6_h768_uncased_pipeline_en.md | 70 ++++++++++ ..._masking_finetuned_squad_google_bert_en.md | 86 +++++++++++++ ...finetuned_squad_google_bert_pipeline_en.md | 69 ++++++++++ .../2024-09-07-bert_large_uncased_lmd_en.md | 94 ++++++++++++++ ...9-07-bert_large_uncased_lmd_pipeline_en.md | 70 ++++++++++ .../2024-09-07-bert_portuguese_ner_en.md | 94 ++++++++++++++ ...4-09-07-bert_portuguese_ner_pipeline_en.md | 70 ++++++++++ ...i_uncased_finetuned_xquadv1_pipeline_xx.md | 69 ++++++++++ ...bert_multi_uncased_finetuned_xquadv1_xx.md | 86 +++++++++++++ ...-07-bert_qa_model_jahanzeb1_pipeline_en.md | 69 ++++++++++ ...-09-07-bert_static_malware_detection_en.md | 94 ++++++++++++++ ...rkish_fine_tuning_question_answering_en.md | 86 +++++++++++++ ...e_tuning_question_answering_pipeline_en.md | 69 ++++++++++ ...7-bertspan4ner_base_chinese_pipeline_zh.md | 70 ++++++++++ ...2024-09-07-bertspan4ner_base_chinese_zh.md | 94 ++++++++++++++ .../2024-09-07-berturk_social_5m_en.md | 94 ++++++++++++++ ...024-09-07-berturk_social_5m_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-biodivbert_en.md | 92 ++++++++++++++ ...a_finetuned_valid_testing_0_00005_16_en.md | 94 ++++++++++++++ ...ta_finetuned_valid_testing_0_0001_16_en.md | 94 ++++++++++++++ ...ned_valid_testing_0_0001_16_pipeline_en.md | 70 ++++++++++ ...neg_finetuned_webnlg2020_correctness_en.md | 86 +++++++++++++ ...l_mpnet_base_v2_finetuned_webnlg2017_en.md | 86 +++++++++++++ ...ase_v2_finetuned_webnlg2017_pipeline_en.md | 69 ++++++++++ ...09-07-bmg_translation_lug_english_v1_en.md | 94 ++++++++++++++ ..._translation_lug_english_v1_pipeline_en.md | 70 ++++++++++ ...-07-bpe_selfies_pubchem_shard00_120k_en.md | 94 ++++++++++++++ ...elfies_pubchem_shard00_120k_pipeline_en.md | 70 ++++++++++ ...elfies_pubchem_shard00_150k_pipeline_en.md | 70 ++++++++++ ...24-09-07-brwac_v1_2__checkpoint_last_en.md | 94 ++++++++++++++ ...o_ehr_spanish_cantemist_ner_pipeline_en.md | 70 ++++++++++ ...bsc_bio_ehr_spanish_carmen_distemist_es.md | 94 ++++++++++++++ ...hr_spanish_carmen_distemist_pipeline_es.md | 70 ++++++++++ ...-bsc_bio_ehr_spanish_carmen_meddocan_es.md | 94 ++++++++++++++ ...train_drugtemist_dev_85_ner_pipeline_en.md | 70 ++++++++++ ...bsc_bio_ehr_spanish_livingner_humano_es.md | 94 ++++++++++++++ ..._ehr_spanish_symptemist_ner_pipeline_en.md | 70 ++++++++++ ...-09-07-burmese_awesome_model_akash24_en.md | 94 ++++++++++++++ ...9-07-burmese_awesome_model_mitra_uta_en.md | 94 ++++++++++++++ ...ese_awesome_model_mitra_uta_pipeline_en.md | 70 ++++++++++ ...mese_awesome_qa_model_abhinavreddy17_en.md | 86 +++++++++++++ ...ome_qa_model_abhinavreddy17_pipeline_en.md | 69 ++++++++++ ...awesome_qa_model_ayushij074_pipeline_en.md | 69 ++++++++++ ...ese_awesome_qa_model_b43646_pipeline_en.md | 69 ++++++++++ ...-07-burmese_awesome_qa_model_bbrenes_en.md | 86 +++++++++++++ ...se_awesome_qa_model_bbrenes_pipeline_en.md | 69 ++++++++++ ...some_qa_model_bilalkhan2024_pipeline_en.md | 69 ++++++++++ ...-burmese_awesome_qa_model_dedemilano_en.md | 86 +++++++++++++ ...07-burmese_awesome_qa_model_freongas_en.md | 86 +++++++++++++ ...e_awesome_qa_model_freongas_pipeline_en.md | 69 ++++++++++ ...se_awesome_qa_model_gaogao8_pipeline_en.md | 69 ++++++++++ ...mese_awesome_qa_model_jackyfung00358_en.md | 86 +++++++++++++ ...07-burmese_awesome_qa_model_jamjacob_en.md | 86 +++++++++++++ ...e_awesome_qa_model_jamjacob_pipeline_en.md | 69 ++++++++++ ...9-07-burmese_awesome_qa_model_jyl480_en.md | 86 +++++++++++++ ...wesome_qa_model_kalyanmaram_pipeline_en.md | 69 ++++++++++ ...-burmese_awesome_qa_model_markchiing_en.md | 86 +++++++++++++ ...07-burmese_awesome_qa_model_mattdyor_en.md | 86 +++++++++++++ ...e_awesome_qa_model_mattdyor_pipeline_en.md | 69 ++++++++++ ...9-07-burmese_awesome_qa_model_myajun_en.md | 86 +++++++++++++ ...se_awesome_qa_model_pavi156_pipeline_en.md | 69 ++++++++++ ...7-burmese_awesome_qa_model_rahulcdeo_en.md | 86 +++++++++++++ ..._awesome_qa_model_rahulcdeo_pipeline_en.md | 69 ++++++++++ ...urmese_awesome_qa_model_ravinderbrai_en.md | 86 +++++++++++++ ...e_awesome_qa_model_reza2002_pipeline_en.md | 69 ++++++++++ ...e_awesome_qa_model_shrutina_pipeline_en.md | 69 ++++++++++ ...ese_awesome_qa_model_venkatarajendra_en.md | 86 +++++++++++++ ...some_qa_model_vikas12061995_pipeline_en.md | 69 ++++++++++ ...rmese_awesome_qa_model_wandaabudiono_en.md | 86 +++++++++++++ ...rmese_awesome_setfit_model_ivanzidov_en.md | 86 +++++++++++++ ...e_awesome_text_classification_v2_1_0_en.md | 94 ++++++++++++++ ..._text_classification_v2_1_0_pipeline_en.md | 70 ++++++++++ ..._awesome_token_classification_v2_1_3_en.md | 94 ++++++++++++++ ...token_classification_v2_1_3_pipeline_en.md | 70 ++++++++++ ...urmese_awesome_wnut_all_jhs_pipeline_en.md | 70 ++++++++++ ...09-07-burmese_awesome_wnut_all_place_en.md | 94 ++++++++++++++ ...mese_awesome_wnut_model_anirudhramoo_en.md | 94 ++++++++++++++ ...-burmese_awesome_wnut_model_carlonos_en.md | 94 ++++++++++++++ ...ome_wnut_model_halikuralde2_pipeline_en.md | 70 ++++++++++ ...awesome_wnut_model_pavement_pipeline_en.md | 70 ++++++++++ ...e_wnut_model_priyanshug0405_pipeline_en.md | 70 ++++++++++ ...se_awesome_wnut_model_robertiulian10_en.md | 94 ++++++++++++++ ...urmese_awesome_wnut_model_sreeharipv_en.md | 94 ++++++++++++++ ...nut_model_stephen_osullivan_pipeline_en.md | 70 ++++++++++ ...rmese_awesome_wnut_model_wstcpyt1988_en.md | 94 ++++++++++++++ .../2024-09-07-burmese_awesome_wnut_neg_en.md | 94 ++++++++++++++ ...07-burmese_distilbert_model_qaicodes_en.md | 94 ++++++++++++++ .../2024-09-07-burmese_first_qa_model_en.md | 86 +++++++++++++ ...-09-07-burmese_qa_model_martacaldero_en.md | 86 +++++++++++++ ...rmese_qa_model_martacaldero_pipeline_en.md | 69 ++++++++++ .../2024-09-07-burmese_spanish_model_en.md | 86 +++++++++++++ ...2024-09-07-burmese_test_setfit_model_en.md | 86 +++++++++++++ ...7-burmese_test_setfit_model_pipeline_en.md | 69 ++++++++++ .../2024-09-07-byline_detection_en.md | 94 ++++++++++++++ ...2024-09-07-byline_detection_pipeline_en.md | 70 ++++++++++ ...inetuned_on_runaways_french_pipeline_en.md | 70 ++++++++++ .../2024-09-07-cat_ner_xlmr_2_pipeline_en.md | 70 ++++++++++ ...024-09-07-cat_sayula_popoluca_iwcg_3_en.md | 94 ++++++++++++++ ...-07-chemberta_pubchem1m_shard00_140k_en.md | 94 ++++++++++++++ .../2024-09-07-chinese_medical_ner_en.md | 94 ++++++++++++++ ...4-09-07-chinese_medical_ner_pipeline_en.md | 70 ++++++++++ ...ec_1_1_supertypes_czert_b_base_cased_en.md | 94 ++++++++++++++ ...pertypes_czert_b_base_cased_pipeline_en.md | 70 ++++++++++ ...024-09-07-codebert_small_v2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-coha2000s_en.md | 94 ++++++++++++++ .../2024-09-07-cold_fusion_itr10_seed3_en.md | 94 ++++++++++++++ ...-07-cold_fusion_itr10_seed3_pipeline_en.md | 70 ++++++++++ .../2024-09-07-constructive_model_es.md | 94 ++++++++++++++ ...24-09-07-constructive_model_pipeline_es.md | 70 ++++++++++ .../2024-09-07-contrabert_c_en.md | 94 ++++++++++++++ .../2024-09-07-contrabert_c_pipeline_en.md | 70 ++++++++++ ...bert_base_turkish_cased_ner_pipeline_tr.md | 70 ++++++++++ .../2024-09-07-cot_ep3_1122_en.md | 86 +++++++++++++ ...-09-07-covid_qa_distillbert_pipeline_en.md | 69 ++++++++++ ..._finetuned_webnlg2020_metric_average_en.md | 86 +++++++++++++ ...d_webnlg2020_metric_average_pipeline_en.md | 69 ++++++++++ ..._v2_finetuned_webnlg2020_correctness_en.md | 86 +++++++++++++ ...encoder_mmarco_mminilmv2_l12_h384_v1_en.md | 94 ++++++++++++++ ...7-cuad_distil_governing_law_08_28_v1_en.md | 86 +++++++++++++ .../2024-09-07-cybert_cyner_en.md | 100 +++++++++++++++ ...ta_large_twitter_pop_binary_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-demomodel_en.md | 94 ++++++++++++++ .../2024-09-07-demomodel_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-deproberta_en.md | 94 ++++++++++++++ .../2024-09-07-deproberta_pipeline_en.md | 70 ++++++++++ .../2024-09-07-disaster_cutom_ner_v1_en.md | 94 ++++++++++++++ ...09-07-disaster_cutom_ner_v1_pipeline_en.md | 70 ++++++++++ ...ain_token_classification_nepal_bhasa_en.md | 94 ++++++++++++++ ..._distilled_squad_distilbert_pipeline_en.md | 69 ++++++++++ ...ilbert_base_cased_distilled_squad_v2_en.md | 86 +++++++++++++ ...se_cased_distilled_squad_v2_pipeline_en.md | 69 ++++++++++ ...rt_base_cased_finetuned_pfe_projectt_en.md | 86 +++++++++++++ ...ish_uncased_finetuned_rock_argentino_en.md | 94 ++++++++++++++ ...ed_finetuned_rock_argentino_pipeline_en.md | 70 ++++++++++ ..._uncased_finetuned_text_intelligence_en.md | 94 ++++++++++++++ ..._uncased_detected_jailbreak_pipeline_en.md | 70 ++++++++++ ...se_uncased_distilled_clinc_schnatz65_en.md | 94 ++++++++++++++ ...stilbert_base_uncased_finetuned_cefr_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_cefr_pipeline_en.md | 70 ++++++++++ ...ase_uncased_finetuned_clinc_jeremygf_en.md | 94 ++++++++++++++ ...t_base_uncased_finetuned_con_dataset_en.md | 94 ++++++++++++++ ...bert_base_uncased_finetuned_dol_ecab_en.md | 94 ++++++++++++++ ..._uncased_finetuned_dol_ecab_pipeline_en.md | 70 ++++++++++ ...ed_finetuned_imdb_accelerate_abh1na5_en.md | 94 ++++++++++++++ ...ned_imdb_accelerate_abh1na5_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_imdb_accelerate_zmeeks_en.md | 94 ++++++++++++++ ...rt_base_uncased_finetuned_imdb_coign_en.md | 94 ++++++++++++++ ...ncased_finetuned_imdb_coign_pipeline_en.md | 70 ++++++++++ ...ased_finetuned_imdb_jinq047_pipeline_en.md | 70 ++++++++++ ...ase_uncased_finetuned_imdb_pkyriakis_en.md | 94 ++++++++++++++ ...ed_finetuned_imdb_pkyriakis_pipeline_en.md | 70 ++++++++++ ...ase_uncased_finetuned_imdb_raincheck_en.md | 94 ++++++++++++++ ...t_base_uncased_finetuned_imdb_rohbro_en.md | 94 ++++++++++++++ ...cased_finetuned_imdb_rohbro_pipeline_en.md | 70 ++++++++++ ...cased_finetuned_ner_dev4952_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_ner_harsh1304_pipeline_en.md | 70 ++++++++++ ..._uncased_finetuned_ner_hemg_pipeline_en.md | 70 ++++++++++ ...finetuned_ner_priyabrata018_pipeline_en.md | 70 ++++++++++ ...t_base_uncased_finetuned_ner_raulgdp_en.md | 94 ++++++++++++++ ...uned_ner_renardkorzeniowski_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_ner_shashank612_en.md | 94 ++++++++++++++ ...d_finetuned_ner_shashank612_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_ner_wangyue6761_en.md | 94 ++++++++++++++ ...tilbert_base_uncased_finetuned_nersd_en.md | 94 ++++++++++++++ ...ase_uncased_finetuned_nersd_pipeline_en.md | 70 ++++++++++ ...ased_finetuned_squad_bat007_pipeline_en.md | 69 ++++++++++ ..._finetuned_squad_begoniabcs_pipeline_en.md | 69 ++++++++++ ...e_uncased_finetuned_squad_bighands23_en.md | 86 +++++++++++++ ..._finetuned_squad_bighands23_pipeline_en.md | 69 ++++++++++ ...ed_squad_d5716d28_dchung117_pipeline_en.md | 69 ++++++++++ ...ed_finetuned_squad_d5716d28_dharma20_en.md | 86 +++++++++++++ ...ned_squad_d5716d28_dharma20_pipeline_en.md | 69 ++++++++++ ..._finetuned_squad_d5716d28_gautam1989_en.md | 92 ++++++++++++++ ...d_squad_d5716d28_gautam1989_pipeline_en.md | 69 ++++++++++ ..._finetuned_squad_d5716d28_juancopi81_en.md | 92 ++++++++++++++ ...d_finetuned_squad_d5716d28_jwlovetea_en.md | 92 ++++++++++++++ ...finetuned_squad_d5716d28_osanseviero_en.md | 92 ++++++++++++++ ...base_uncased_finetuned_squad_decre99_en.md | 86 +++++++++++++ ...sed_finetuned_squad_decre99_pipeline_en.md | 69 ++++++++++ ...base_uncased_finetuned_squad_devsick_en.md | 86 +++++++++++++ ...sed_finetuned_squad_devsick_pipeline_en.md | 69 ++++++++++ ..._uncased_finetuned_squad_ep8_batch16_en.md | 86 +++++++++++++ ...se_uncased_finetuned_squad_gyeol0225_en.md | 86 +++++++++++++ ...sed_finetuned_squad_ikeofai_pipeline_en.md | 69 ++++++++++ ...se_uncased_finetuned_squad_injustice_en.md | 86 +++++++++++++ ...d_finetuned_squad_injustice_pipeline_en.md | 69 ++++++++++ ...ase_uncased_finetuned_squad_sasa3396_en.md | 86 +++++++++++++ ...ased_finetuned_squad_soikit_pipeline_en.md | 69 ++++++++++ ..._base_uncased_finetuned_squad_vijaym_en.md | 86 +++++++++++++ ..._uncased_finetuned_squad_yweslakarep_en.md | 86 +++++++++++++ ...e_uncased_finetuned_wikiann_pipeline_en.md | 70 ++++++++++ ...e_uncased_pii_200_burkelive_pipeline_en.md | 70 ++++++++++ ...uncased_squad2_lora_merged_jeukhwang_en.md | 86 +++++++++++++ ...7-distilbert_base_uncased_squad2_p10_en.md | 86 +++++++++++++ ...ert_base_uncased_squad2_p15_pipeline_en.md | 69 ++++++++++ ...e_uncased_squad2_pruned_p30_pipeline_en.md | 69 ++++++++++ ..._finetuned_ner_veronica1608_pipeline_en.md | 70 ++++++++++ ...-distilbert_finetuned_squad_hhjingbo_en.md | 86 +++++++++++++ ...rt_finetuned_squad_hhjingbo_pipeline_en.md | 69 ++++++++++ ...7-distilbert_finetuned_squadv2_eenda_en.md | 86 +++++++++++++ ...7-distilbert_finetuned_squadv2_lusic_en.md | 86 +++++++++++++ ...ert_finetuned_squadv2_thangduong0509_en.md | 86 +++++++++++++ ...uned_squadv2_thangduong0509_pipeline_en.md | 69 ++++++++++ ...07-distilbert_imdb_deborahm_pipeline_en.md | 70 ++++++++++ ...-07-distilbert_kazakh_ner_2_pipeline_en.md | 70 ++++++++++ .../2024-09-07-distilbert_squad_dofla_en.md | 86 +++++++++++++ .../2024-09-07-distilbert_trained_en.md | 94 ++++++++++++++ ...24-09-07-distilbert_trained_pipeline_en.md | 70 ++++++++++ ...24-09-07-distilbert_twitter_pipeline_en.md | 70 ++++++++++ .../2024-09-07-distillber_squadv2_en.md | 86 +++++++++++++ ...tilroberta_base_climate_d_s_pipeline_en.md | 70 ++++++++++ ...lombian_sign_language_python_bimodal_en.md | 94 ++++++++++++++ ...ign_language_python_bimodal_pipeline_en.md | 70 ++++++++++ ...-07-distilroberta_base_ft_mensrights_en.md | 94 ++++++++++++++ ...lroberta_base_ft_mensrights_pipeline_en.md | 70 ++++++++++ ...roberta_base_ft_trueunpopularopinion_en.md | 94 ++++++++++++++ ...4-09-07-distilroberta_eli5_mlm_model_en.md | 94 ++++++++++++++ ...istilroberta_eli5_mlm_model_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_alejoa_en.md | 94 ++++++++++++++ .../2024-09-07-dummy_model_anrilombard_en.md | 94 ++++++++++++++ ...2024-09-07-dummy_model_appletreeleaf_en.md | 94 ++++++++++++++ ...7-dummy_model_appletreeleaf_pipeline_en.md | 70 ++++++++++ ...-dummy_model_bhaskar_gautam_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_elliotsmith_en.md | 94 ++++++++++++++ ...-07-dummy_model_elliotsmith_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_hanzhuo_en.md | 94 ++++++++++++++ .../2024-09-07-dummy_model_jfforero_en.md | 94 ++++++++++++++ .../2024-09-07-dummy_model_linyi_en.md | 94 ++++++++++++++ .../2024-09-07-dummy_model_marasaki_en.md | 94 ++++++++++++++ ...-09-07-dummy_model_marasaki_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_melody20_en.md | 94 ++++++++++++++ ...-09-07-dummy_model_melody20_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_pipeline_zh.md | 70 ++++++++++ ...24-09-07-dummy_model_rkn222_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_sajid73_en.md | 94 ++++++++++++++ ...4-09-07-dummy_model_sajid73_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_shadowtwin41_fr.md | 94 ++++++++++++++ ...07-dummy_model_shadowtwin41_pipeline_fr.md | 70 ++++++++++ ...ummy_model_test_osanseviero_pipeline_en.md | 70 ++++++++++ ...2024-09-07-dummy_model_varunpatrikar_en.md | 94 ++++++++++++++ ...7-dummy_model_varunpatrikar_pipeline_en.md | 70 ++++++++++ .../2024-09-07-dummy_model_vonewman_en.md | 94 ++++++++++++++ ...-09-07-dummy_model_vonewman_pipeline_en.md | 70 ++++++++++ ...024-09-07-dummy_model_yuuhanishigata_en.md | 94 ++++++++++++++ ...-dummy_model_yuuhanishigata_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-dummy_model_zh.md | 94 ++++++++++++++ .../2024-09-07-dzoqa_malayalam_en.md | 86 +++++++++++++ ...24-09-07-electra_qa_base_best_squad2_en.md | 98 ++++++++++++++ ...electra_qa_base_best_squad2_pipeline_en.md | 69 ++++++++++ .../2024-09-07-elvis_roberta_pipeline_en.md | 70 ++++++++++ ...2024-09-07-email_question_extraction_en.md | 94 ++++++++++++++ ...7-email_question_extraction_pipeline_en.md | 70 ++++++++++ .../2024-09-07-emotion_test_1000_en.md | 94 ++++++++++++++ ...-09-07-english_multinerd_ner_roberta_en.md | 94 ++++++++++++++ ...glish_multinerd_ner_roberta_pipeline_en.md | 70 ++++++++++ .../2024-09-07-envroberta_base_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-erikrepo_en.md | 94 ++++++++++++++ .../2024-09-07-esmlmt62_2500_en.md | 94 ++++++++++++++ ...-07-esperberto_small_sayula_popoluca_eo.md | 94 ++++++++++++++ .../2024-09-07-eth_setfit_model_en.md | 86 +++++++++++++ ...2024-09-07-eth_setfit_model_pipeline_en.md | 69 ++++++++++ ...07-ewondo_xlm_roberta_base_pipeline_nan.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-expe_0_en.md | 94 ++++++++++++++ .../2024-09-07-expe_0_pipeline_en.md | 70 ++++++++++ .../2024-09-07-extractive_qa_squad_en.md | 86 +++++++++++++ .../2024-09-07-fairlex_ecthr_minilm_en.md | 94 ++++++++++++++ ...-09-07-fairlex_ecthr_minilm_pipeline_en.md | 70 ++++++++++ ...fewshot_qa_002_20230622_001_pipeline_en.md | 69 ++++++++++ .../2024-09-07-film95960roberta_base_en.md | 94 ++++++++++++++ ...09-07-film95960roberta_base_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-fine_tune_en.md | 86 +++++++++++++ .../2024-09-07-fine_tune_pipeline_en.md | 69 ++++++++++ .../2024-09-07-fine_tuned_distilbert_en.md | 86 +++++++++++++ .../2024-09-07-fine_tuned_tradisi_bali_en.md | 86 +++++++++++++ ...b_english_tonga_tonga_islands_korean_en.md | 94 ++++++++++++++ ..._tonga_tonga_islands_korean_pipeline_en.md | 70 ++++++++++ ...one_epoch_multi_qa_mpnet_base_dot_v1_en.md | 86 +++++++++++++ ..._multi_qa_mpnet_base_dot_v1_pipeline_en.md | 69 ++++++++++ ...lassfication_roberta_model_slickdata_en.md | 94 ++++++++++++++ ...ion_roberta_model_slickdata_pipeline_en.md | 70 ++++++++++ .../2024-09-07-finetuning_mixed_en.md | 86 +++++++++++++ .../2024-09-07-finsentencebert_en.md | 86 +++++++++++++ ...4-09-07-fresh_model_uncased_pipeline_en.md | 70 ++++++++++ ...24-09-07-from_classifier_v1_pipeline_en.md | 69 ++++++++++ .../2024-09-07-gal_sayula_popoluca_iw_3_en.md | 94 ++++++++++++++ ...07-gal_sayula_popoluca_iw_3_pipeline_en.md | 70 ++++++++++ ...erative_qas_pariwisata_bali_pipeline_en.md | 69 ++++++++++ .../2024-09-07-gr_roberta_base_en.md | 94 ++++++++++++++ .../2024-09-07-gr_roberta_base_pipeline_en.md | 70 ++++++++++ .../2024-09-07-greeklegalroberta_v2_en.md | 94 ++++++++++++++ ...-09-07-greeklegalroberta_v2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-hafez_bert_fa.md | 94 ++++++++++++++ ...-07-hasoc19_xlm_roberta_base_profane_en.md | 94 ++++++++++++++ ...19_xlm_roberta_base_profane_pipeline_en.md | 70 ++++++++++ ...9-07-hate_hate_random2_seed2_bernice_en.md | 94 ++++++++++++++ ..._hate_random2_seed2_bernice_pipeline_en.md | 70 ++++++++++ ...eech_detection_mpnet_basev2_pipeline_en.md | 70 ++++++++++ ...helsinki_danish_swedish_v12_pipeline_en.md | 70 ++++++++++ ...024-09-07-helsinki_danish_swedish_v2_en.md | 94 ++++++++++++++ .../2024-09-07-hiner_romanian_en.md | 94 ++++++++++++++ .../2024-09-07-hiner_romanian_pipeline_en.md | 70 ++++++++++ .../2024-09-07-humour_detection_xlmr_en.md | 94 ++++++++++++++ ...09-07-humour_detection_xlmr_pipeline_en.md | 70 ++++++++++ .../2024-09-07-hupd_distilroberta_base_en.md | 94 ++++++++++++++ ...024-09-07-idiom_xlm_roberta_pipeline_en.md | 70 ++++++++++ .../2024-09-07-ife_sentence_model2_en.md | 86 +++++++++++++ ...4-09-07-ife_sentence_model2_pipeline_en.md | 69 ++++++++++ ...09-07-improved_xlm_attempt2_pipeline_en.md | 70 ++++++++++ ...-07-indicbertv2_mlm_sam_tlm_pipeline_xx.md | 70 ++++++++++ .../2024-09-07-indicbertv2_mlm_sam_tlm_xx.md | 94 ++++++++++++++ .../2024-09-07-intent_global_en.md | 94 ++++++++++++++ ...om5240_whisper_small_zhhk_1_pipeline_en.md | 69 ++++++++++ ...ig_ctx4_cwd3_english_french_pipeline_en.md | 70 ++++++++++ ...arian_small_ctx4_cwd1_english_french_en.md | 94 ++++++++++++++ ...g_base_ft_udpos213_top2lang_pipeline_en.md | 70 ++++++++++ ...24-09-07-lab1_finetuning_cheyannelam_en.md | 94 ++++++++++++++ .../2024-09-07-lab1_finetuning_daanjiri_en.md | 94 ++++++++++++++ ...07-lab1_finetuning_daanjiri_pipeline_en.md | 70 ++++++++++ .../2024-09-07-lab1_random_daanjiri_en.md | 94 ++++++++++++++ ...24-09-07-lab1_random_sfliao_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-lbt5_large_en.md | 86 +++++++++++++ .../2024-09-07-lbt5_large_pipeline_en.md | 69 ++++++++++ ...7-legal_longformer_base_4096_spanish_es.md | 94 ++++++++++++++ .../2024-09-07-leia_lm_large_en.md | 94 ++++++++++++++ .../2024-09-07-leia_lm_large_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-llama_model_en.md | 94 ++++++++++++++ ...lld_valbadia_ita_loresmt_l4_pipeline_it.md | 70 ++++++++++ ...4-09-07-lm_ner_skills_extractor_bert_en.md | 94 ++++++++++++++ ...m_ner_skills_extractor_bert_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-logprecis_en.md | 94 ++++++++++++++ ...lr1e5_bs8_distilbert_qa_pytorch_full_en.md | 86 +++++++++++++ ..._distilbert_qa_pytorch_full_pipeline_en.md | 69 ++++++++++ ...thien_tra_bai_tieng_anh_chuyen_nganh_en.md | 86 +++++++++++++ ..._bai_tieng_anh_chuyen_nganh_pipeline_en.md | 69 ++++++++++ .../2024-09-07-macedonian_roberta_base_mk.md | 94 ++++++++++++++ ...-07-macedonian_roberta_base_pipeline_mk.md | 70 ++++++++++ ...norwegian_swedish_finetuned_pipeline_sv.md | 70 ++++++++++ .../2024-09-07-malurl_roberta_10e_en.md | 94 ++++++++++++++ ..._arabic_parallel_10k_splitted_cosine_en.md | 94 ++++++++++++++ ...arallel_10k_splitted_cosine_pipeline_en.md | 70 ++++++++++ ...arian_finetuned_combined_dataset_1_1_en.md | 94 ++++++++++++++ ...etuned_combined_dataset_1_1_pipeline_en.md | 70 ++++++++++ ...nch_accelerate_chandrasutrisnotjhong_en.md | 94 ++++++++++++++ ...erate_chandrasutrisnotjhong_pipeline_en.md | 70 ++++++++++ ...onga_islands_french_accelerate_hjhj2_en.md | 94 ++++++++++++++ ...nds_french_accelerate_hjhj2_pipeline_en.md | 70 ++++++++++ ...onga_islands_french_accelerate_leisa_en.md | 94 ++++++++++++++ ...nds_french_accelerate_leisa_pipeline_en.md | 70 ++++++++++ ...ga_islands_french_accelerate_naninya_en.md | 94 ++++++++++++++ ...slands_french_accelerate_satyashetty_en.md | 94 ++++++++++++++ ...ench_accelerate_satyashetty_pipeline_en.md | 70 ++++++++++ ...tonga_tonga_islands_french_pickupppp_en.md | 94 ++++++++++++++ ...ga_islands_french_pickupppp_pipeline_en.md | 70 ++++++++++ ..._tonga_tonga_islands_french_yimhuang_en.md | 94 ++++++++++++++ ...glish_tonga_tonga_islands_hindi_comm_en.md | 94 ++++++++++++++ ...ga_tonga_islands_hindi_comm_pipeline_en.md | 70 ++++++++++ .../2024-09-07-marian_frwo_pipeline_en.md | 70 ++++++++++ ...bbc_nigerian_pidgin_english_pipeline_en.md | 70 ++++++++++ ...rianmt_ufal_english_spanish_pipeline_en.md | 70 ++++++++++ ...mbti_classification_roberta_base_aug_en.md | 94 ++++++++++++++ ...sification_roberta_base_aug_pipeline_en.md | 70 ++++++++++ ...4-09-07-medical_english_spanish_8_16_en.md | 94 ++++++++++++++ ...edical_english_spanish_8_16_pipeline_en.md | 70 ++++++++++ ...4-09-07-memo_bert_sanskrit_saskta_01_da.md | 94 ++++++++++++++ ...emo_bert_sanskrit_saskta_01_pipeline_da.md | 70 ++++++++++ .../2024-09-07-mformer_care_pipeline_en.md | 70 ++++++++++ .../2024-09-07-mika_safeaerobert_en.md | 94 ++++++++++++++ ...024-09-07-mika_safeaerobert_pipeline_en.md | 70 ++++++++++ ...9-07-minilmv2_l6_h384_r_fineweb_100k_en.md | 94 ++++++++++++++ .../2024-09-07-mlcovid19_classifier_en.md | 94 ++++++++++++++ ...-09-07-mlcovid19_classifier_pipeline_en.md | 70 ++++++++++ ...mminilmv2_l12_h384_v1_y2lan_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-model1_en.md | 94 ++++++++++++++ ...del3e_norwegian_wd_norwegian_perturb_en.md | 94 ++++++++++++++ ..._tonga_tonga_islands_french_pipeline_en.md | 70 ++++++++++ ...4-09-07-model_m5_english_chinese_twi_en.md | 94 ++++++++++++++ ...odel_m5_english_chinese_twi_pipeline_en.md | 70 ++++++++++ ...4-09-07-model_name_kayyyy27_pipeline_en.md | 70 ++++++++++ ...-07-movie_review_roberta_imalexianne_en.md | 94 ++++++++++++++ ...7-mpnet_base_natural_questions_mnsrl_en.md | 86 +++++++++++++ ...ase_natural_questions_mnsrl_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-07-msbert_he.md | 94 ++++++++++++++ .../2024-09-07-msbert_pipeline_he.md | 70 ++++++++++ ...2024-09-07-mtl_roberta_base_pipeline_en.md | 70 ++++++++++ ..._search_65_25_1epoch_full_p_pipeline_en.md | 69 ++++++++++ ...e_dot_v1_covidqa_search_75_25_2epoch_en.md | 86 +++++++++++++ ...covidqa_search_75_25_2epoch_pipeline_en.md | 69 ++++++++++ ...i_qa_mpnet_base_dot_v1_fine_tuned_hs_en.md | 86 +++++++++++++ ...t_base_dot_v1_fine_tuned_hs_pipeline_en.md | 69 ++++++++++ ...e5_base_classification_v0_4_pipeline_xx.md | 70 ++++++++++ ...ilingual_e5_base_classification_v0_4_xx.md | 94 ++++++++++++++ ..._xlm_roberta_for_ner_eserdy_pipeline_xx.md | 70 ++++++++++ ...tilingual_xlm_roberta_for_ner_eserdy_xx.md | 94 ++++++++++++++ ...roberta_imdb_padding60model_pipeline_en.md | 70 ++++++++++ ...07-named_entity_recognition_pipeline_en.md | 70 ++++++++++ ...-nepal_bhasa_trained_danish_pipeline_en.md | 70 ++++++++++ ...24-09-07-nepal_bhasa_trained_serbian_en.md | 94 ++++++++++++++ ...nepal_bhasa_trained_serbian_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-ner_411_id.md | 94 ++++++++++++++ .../2024-09-07-ner_411_pipeline_id.md | 70 ++++++++++ ...024-09-07-ner_classifier_distil_bert_en.md | 94 ++++++++++++++ ...-ner_classifier_distil_bert_pipeline_en.md | 70 ++++++++++ .../2024-09-07-ner_cw_pipeline_en.md | 66 ++++++++++ .../2024-09-07-ner_finetuned_en.md | 94 ++++++++++++++ ...2024-09-07-ner_legal_german_pipeline_de.md | 70 ++++++++++ .../2024-09-07-ner_model_abderrahimal_en.md | 94 ++++++++++++++ ...9-07-ner_model_abderrahimal_pipeline_en.md | 70 ++++++++++ ...-ner_ner_random3_seed0_roberta_large_en.md | 94 ++++++++++++++ ...-ner_newsagency_bert_french_pipeline_fr.md | 70 ++++++++++ ...9-07-nerd_nerd_random0_seed1_bernice_en.md | 94 ++++++++++++++ ..._nerd_random0_seed1_bernice_pipeline_en.md | 70 ++++++++++ ...9-07-nerd_nerd_random3_seed0_bernice_en.md | 94 ++++++++++++++ ..._nerd_random3_seed0_bernice_pipeline_en.md | 70 ++++++++++ .../2024-09-07-netflix_reviews_en.md | 94 ++++++++++++++ .../2024-09-07-netflix_reviews_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-nofibot3_en.md | 86 +++++++++++++ ...2024-09-07-norwegian_bokml_bert_base_no.md | 94 ++++++++++++++ ...bokml_roberta_base_scandinavian_long_en.md | 94 ++++++++++++++ ...erta_base_scandinavian_long_pipeline_en.md | 70 ++++++++++ ...egian_bokml_whisper_base_nbailabbeta_no.md | 84 ++++++++++++ ...ml_whisper_base_nbailabbeta_pipeline_no.md | 69 ++++++++++ ...07-norwegian_bokml_whisper_tiny_beta_no.md | 84 ++++++++++++ ...ian_bokml_whisper_tiny_beta_pipeline_no.md | 69 ++++++++++ ...l6_h384_distilled_from_roberta_large_en.md | 94 ++++++++++++++ ...024-09-07-nuclear_medicine_daroberta_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-nyaszzzz_en.md | 86 +++++++++++++ .../2024-09-07-ofa_multi_100_en.md | 94 ++++++++++++++ .../2024-09-07-ofa_multi_100_pipeline_en.md | 70 ++++++++++ .../2024-09-07-opticalbert_cased_squad2_en.md | 86 +++++++++++++ ...024-09-07-opus_base_ailem_adaptified_en.md | 94 ++++++++++++++ ...-opus_base_ailem_adaptified_pipeline_en.md | 70 ++++++++++ .../2024-09-07-opus_big_ailem_random_en.md | 94 ++++++++++++++ ..._src_tonga_tonga_islands_trg_testing_en.md | 94 ++++++++++++++ ..._english_tonga_tonga_islands_italian_en.md | 94 ++++++++++++++ ...nga_tonga_islands_romanian_anhtuanta_en.md | 94 ++++++++++++++ ..._islands_romanian_anhtuanta_pipeline_en.md | 70 ++++++++++ ...nds_romanian_louistichelman_pipeline_en.md | 70 ++++++++++ ..._tonga_tonga_islands_romanian_momo22_en.md | 94 ++++++++++++++ ...nga_islands_romanian_momo22_pipeline_en.md | 70 ++++++++++ ...tonga_islands_english_clean_marianmt_en.md | 94 ++++++++++++++ ...ga_islands_swahili_zumaridi_pipeline_en.md | 70 ++++++++++ ..._maltese_french_english_bds_pipeline_en.md | 70 ++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ ...ga_tonga_islands_english_donghyunkim_en.md | 94 ++++++++++++++ ..._islands_english_second_felipetanios_en.md | 94 ++++++++++++++ ...ga_islands_english_second_tiagohatta_en.md | 94 ++++++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ ...ned_npomo_english_15_epochs_pipeline_en.md | 70 ++++++++++ ...nga_tonga_islands_jpn_hani_pipeline_nan.md | 70 ++++++++++ ...07-opus_wmt_finetuned_enfr_wang_2022_en.md | 94 ++++++++++++++ ...mt_finetuned_enfr_wang_2022_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-othe_1_en.md | 94 ++++++++++++++ .../2024-09-07-othe_1_pipeline_en.md | 70 ++++++++++ ...09-07-phowhisper_tiny_vinai_pipeline_vi.md | 69 ++++++++++ .../2024-09-07-phowhisper_tiny_vinai_vi.md | 84 ++++++++++++ .../2024-09-07-pii_model_jayshah07_en.md | 94 ++++++++++++++ ...4-09-07-pii_model_jayshah07_pipeline_en.md | 70 ++++++++++ ...lroberta_base_mrpc_glue_luigitercero_en.md | 94 ++++++++++++++ ...base_mrpc_glue_luigitercero_pipeline_en.md | 70 ++++++++++ ...ilroberta_base_mrpc_miguel_moroyoqui_en.md | 94 ++++++++++++++ ...uguese_finegrained_one_shot_pipeline_en.md | 70 ++++++++++ ...redict_perception_xlmr_blame_concept_en.md | 94 ++++++++++++++ ...rception_xlmr_blame_concept_pipeline_en.md | 70 ++++++++++ .../2024-09-07-prof_ner_spanish_es.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-project_us_en.md | 94 ++++++++++++++ .../2024-09-07-project_us_pipeline_en.md | 70 ++++++++++ .../2024-09-07-prompt_ls_portuguese_2_en.md | 94 ++++++++++++++ ...is_all_mpnet_base_v2_10shot_pipeline_en.md | 69 ++++++++++ ...024-09-07-pubchem10m_smiles_bpe_390k_en.md | 94 ++++++++++++++ ...-pubmedbert_full_finetuned_ner_pablo_en.md | 94 ++++++++++++++ ...rt_full_finetuned_ner_pablo_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-q2d_gpt_35_en.md | 86 +++++++++++++ .../2024-09-07-q2d_gpt_35_pipeline_en.md | 69 ++++++++++ .../2024-09-07-qa_ccc_model_pipeline_en.md | 69 ++++++++++ .../2024-09-07-qa_iiitdmj_testing_en.md | 86 +++++++++++++ .../2024-09-07-qa_model_fsghs_en.md | 86 +++++++++++++ .../2024-09-07-qa_model_fsghs_pipeline_en.md | 69 ++++++++++ .../2024-09-07-qa_model_martacaldero_en.md | 86 +++++++++++++ .../2024-09-07-qa_model_test_ukson_en.md | 86 +++++++++++++ ...4-09-07-qa_model_test_ukson_pipeline_en.md | 69 ++++++++++ ...c_data_with_real_data_finetuned_v2_0_en.md | 86 +++++++++++++ ...th_real_data_finetuned_v2_0_pipeline_en.md | 69 ++++++++++ .../2024-09-07-qamodel_distilbert_en.md | 86 +++++++++++++ ...24-09-07-qamodel_distilbert_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-07-queansmodel_en.md | 86 +++++++++++++ ...4-09-07-question_answering_hansollll_en.md | 86 +++++++++++++ ...uestion_answering_hansollll_pipeline_en.md | 69 ++++++++++ .../2024-09-07-r_fb_sms_lm_pipeline_en.md | 70 ++++++++++ .../2024-09-07-r_t_sms_lm_pipeline_en.md | 70 ++++++++++ .../2024-09-07-rap_phase2_11jan_15i_v2_en.md | 86 +++++++++++++ ...-07-rap_phase2_11jan_15i_v2_pipeline_en.md | 69 ++++++++++ ...-09-07-readability_spanish_sentences_es.md | 94 ++++++++++++++ ...adability_spanish_sentences_pipeline_es.md | 70 ++++++++++ ...-refpydst_5p_referredstates_split_v1_en.md | 86 +++++++++++++ .../ahmedlone127/2024-09-07-robasquerta_eu.md | 94 ++++++++++++++ ...07-roberta_2020_q1_filtered_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_10m_1_en.md | 94 ++++++++++++++ .../2024-09-07-roberta_base_10m_3_en.md | 94 ++++++++++++++ ...24-09-07-roberta_base_10m_3_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_1b_3_en.md | 94 ++++++++++++++ ...024-09-07-roberta_base_1b_3_pipeline_en.md | 70 ++++++++++ ..._base_catalan_plantl_gob_es_pipeline_ca.md | 70 ++++++++++ ...apfake_combined_train_test_15200_2_8_en.md | 94 ++++++++++++++ ...mbined_train_test_15200_2_8_pipeline_en.md | 70 ++++++++++ ...berta_base_education_domain_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_epoch_25_en.md | 94 ++++++++++++++ ...09-07-roberta_base_epoch_25_pipeline_en.md | 70 ++++++++++ ...09-07-roberta_base_epoch_29_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_epoch_43_en.md | 94 ++++++++++++++ ...-roberta_base_finetuned_cola_jinchen_en.md | 94 ++++++++++++++ ...base_finetuned_cola_jinchen_pipeline_en.md | 70 ++++++++++ ...-roberta_base_finetuned_neg_pipeline_en.md | 70 ++++++++++ ...uned_sarcasm_news_headline_detection_en.md | 94 ++++++++++++++ ...asm_news_headline_detection_pipeline_en.md | 70 ++++++++++ ...-roberta_base_ftd_on_glue_qqp_iter_1_en.md | 94 ++++++++++++++ ...base_ftd_on_glue_qqp_iter_1_pipeline_en.md | 70 ++++++++++ ...024-09-07-roberta_base_lego_emotions_en.md | 94 ++++++++++++++ ...-roberta_base_lego_emotions_pipeline_en.md | 70 ++++++++++ ...2024-09-07-roberta_base_lener_breton_en.md | 94 ++++++++++++++ ...7-roberta_base_lener_breton_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_ner_demo3_en.md | 94 ++++++++++++++ ...9-07-roberta_base_ner_demo3_pipeline_en.md | 70 ++++++++++ ...9-07-roberta_base_ner_demo_ganbold13_mn.md | 94 ++++++++++++++ ...9-07-roberta_base_ner_demo_turshilt2_mn.md | 94 ++++++++++++++ ...-roberta_base_qqp_two_stage_pipeline_en.md | 70 ++++++++++ ...024-09-07-roberta_base_rte_two_stage_en.md | 94 ++++++++++++++ ...-roberta_base_rte_two_stage_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_base_thai_en.md | 94 ++++++++++++++ ...09-07-roberta_base_vira_intents_live_en.md | 94 ++++++++++++++ ...erta_base_vira_intents_live_pipeline_en.md | 70 ++++++++++ ..._realsumm_by_examples_fold2_pipeline_en.md | 70 ++++++++++ ...erta_conll_learning_rate1e4_pipeline_en.md | 70 ++++++++++ ...-roberta_dpt_online_sexism_detection_en.md | 94 ++++++++++++++ ...dpt_online_sexism_detection_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-roberta_en.md | 94 ++++++++++++++ ...-07-roberta_full_finetuned_ner_pablo_en.md | 94 ++++++++++++++ ...ta_full_finetuned_ner_pablo_pipeline_en.md | 70 ++++++++++ ...7-roberta_ganda_cased_malay_ner_full_en.md | 94 ++++++++++++++ ...oberta_ganda_cased_malay_ner_v3_test_en.md | 94 ++++++++++++++ ...nda_cased_malay_ner_v3_test_pipeline_en.md | 70 ++++++++++ ...7-roberta_india_ner_trainer_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_large_biomedical_en.md | 94 ++++++++++++++ ...07-roberta_large_biomedical_pipeline_en.md | 70 ++++++++++ ...4-09-07-roberta_large_bne_livingner1_es.md | 94 ++++++++++++++ ...oberta_large_bne_livingner1_pipeline_es.md | 70 ++++++++++ .../2024-09-07-roberta_large_catalan_v2_ca.md | 94 ++++++++++++++ ...07-roberta_large_catalan_v2_pipeline_ca.md | 70 ++++++++++ ...-07-roberta_large_genia_ner_pipeline_en.md | 70 ++++++++++ ...oberta_large_mrpc_two_stage_pipeline_en.md | 70 ++++++++++ ...erta_large_ner_english_finetuned_ner_en.md | 94 ++++++++++++++ ...e_ner_english_finetuned_ner_pipeline_en.md | 70 ++++++++++ ...erta_large_schizophrenia_v3_pipeline_en.md | 70 ++++++++++ ...-09-07-roberta_large_wechsel_finnish_fi.md | 94 ++++++++++++++ ...berta_large_wechsel_finnish_pipeline_fi.md | 70 ++++++++++ ...2024-09-07-roberta_moral_emotion_eng_en.md | 94 ++++++++++++++ ...7-roberta_moral_emotion_eng_pipeline_en.md | 70 ++++++++++ ...ta_ncc_des_128_decayfrom200_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_pipeline_en.md | 70 ++++++++++ ...2024-09-07-roberta_poetry_anger_crpo_en.md | 94 ++++++++++++++ ...7-roberta_poetry_anger_crpo_pipeline_en.md | 70 ++++++++++ .../2024-09-07-roberta_psych_en.md | 94 ++++++++++++++ ...ase_spanish_squades_becasincentivos3_es.md | 92 ++++++++++++++ ...sh_squades_becasincentivos3_pipeline_es.md | 69 ++++++++++ .../2024-09-07-roberta_self_trained_en.md | 94 ++++++++++++++ ...-09-07-roberta_self_trained_pipeline_en.md | 70 ++++++++++ ...panish_clinical_trials_misc_ents_ner_en.md | 94 ++++++++++++++ .../2024-09-07-roberta_squad_v2_en.md | 86 +++++++++++++ ...2024-09-07-roberta_squad_v2_pipeline_en.md | 69 ++++++++++ ...base_ft_udpos213_indonesian_pipeline_en.md | 70 ++++++++++ ...oken_classification_araeval24_aug800_en.md | 94 ++++++++++++++ ...sification_araeval24_aug800_pipeline_en.md | 70 ++++++++++ ...07-roberta_urdu_small_finetuned_urdu_en.md | 94 ++++++++++++++ ...a_urdu_small_finetuned_urdu_pipeline_en.md | 70 ++++++++++ .../2024-09-07-robertalexpt_base_pt.md | 94 ++++++++++++++ .../2024-09-07-robertatwitterbr_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-ros_model_en.md | 94 ++++++++++++++ .../2024-09-07-ros_model_pipeline_en.md | 70 ++++++++++ .../2024-09-07-rseq2_full_data_pipeline_en.md | 70 ++++++++++ .../2024-09-07-run1_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-sbert_imdb_en.md | 94 ++++++++++++++ ...pino_model_xlm_roberta_base_pipeline_en.md | 70 ++++++++++ ...r_base_finetuned_kintweetsc_pipeline_en.md | 71 +++++++++++ ...r_base_finetuned_kintweetsd_pipeline_en.md | 71 +++++++++++ ...nt_albert_persian_farsi_zwnj_base_v2_fa.md | 94 ++++++++++++++ ...nt_alephbertgimmel_base_512_pipeline_he.md | 71 +++++++++++ .../2024-09-07-sent_bernice_pipeline_xx.md | 71 +++++++++++ .../2024-09-07-sent_bernice_xx.md | 94 ++++++++++++++ .../2024-09-07-sent_bert_1890_1900_en.md | 94 ++++++++++++++ ...-sent_bert_base_cased_finetuned_mrpc_en.md | 94 ++++++++++++++ ...t_base_cased_finetuned_mrpc_pipeline_en.md | 71 +++++++++++ ...sent_bert_base_german_cased_oldvocab_de.md | 94 ++++++++++++++ ...09-07-sent_bert_base_indonesian_522m_id.md | 94 ++++++++++++++ ...t_bert_base_indonesian_522m_pipeline_id.md | 71 +++++++++++ ...-09-07-sent_bert_base_irish_cased_v1_en.md | 94 ++++++++++++++ ...nt_bert_base_irish_cased_v1_pipeline_en.md | 71 +++++++++++ ...-sent_bert_base_magicslabnu_pipeline_en.md | 71 +++++++++++ ...-07-sent_bert_base_polish_uncased_v1_pl.md | 94 ++++++++++++++ ..._base_uncased_finetuned_char_hangman_en.md | 94 ++++++++++++++ ...ased_finetuned_char_hangman_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_bert_base_zhtw_en.md | 94 ++++++++++++++ ...l_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt.md | 71 +++++++++++ ...ased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt.md | 80 ++++++++++++ ...t_bertabaporu_large_uncased_pipeline_pt.md | 71 +++++++++++ ...09-07-sent_bertabaporu_large_uncased_pt.md | 94 ++++++++++++++ .../2024-09-07-sent_bertislav_pipeline_cu.md | 71 +++++++++++ ...24-09-07-sent_berturk_legal_pipeline_tr.md | 71 +++++++++++ .../2024-09-07-sent_berturk_legal_tr.md | 94 ++++++++++++++ .../2024-09-07-sent_bio_tinybert_en.md | 94 ++++++++++++++ ...024-09-07-sent_bio_tinybert_pipeline_en.md | 71 +++++++++++ ...lp_biomedbert_large_uncased_abstract_en.md | 94 ++++++++++++++ ...bert_large_uncased_abstract_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_checkpoint_11600_en.md | 94 ++++++++++++++ ...09-07-sent_checkpoint_11600_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_checkpoint_13600_en.md | 94 ++++++++++++++ ...09-07-sent_checkpoint_13600_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_cocodr_base_msmarco_en.md | 94 ++++++++++++++ ...bert_base_turkish_mc4_cased_pipeline_tr.md | 71 +++++++++++ ...sent_convbert_base_turkish_mc4_cased_tr.md | 94 ++++++++++++++ .../2024-09-07-sent_custominlawbert_en.md | 94 ++++++++++++++ .../2024-09-07-sent_distil_clinicalbert_en.md | 94 ++++++++++++++ .../2024-09-07-sent_eq_bert_v1_1_en.md | 94 ++++++++++++++ .../2024-09-07-sent_financialbert_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-sent_fp_xlm_en.md | 94 ++++++++++++++ .../2024-09-07-sent_fp_xlm_pipeline_en.md | 71 +++++++++++ ...-sent_georgian_corpus_model_pipeline_en.md | 71 +++++++++++ ...ent_glot500_with_transliteration_max_en.md | 94 ++++++++++++++ ...transliteration_minangkabau_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_hindi_bert_v2_hi.md | 94 ++++++++++++++ .../2024-09-07-sent_hing_bert_pipeline_hi.md | 71 +++++++++++ ...024-09-07-sent_hinglish_bert_nirantk_en.md | 94 ++++++++++++++ ...-sent_hinglish_bert_nirantk_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_issuebert_large_en.md | 94 ++++++++++++++ ...-09-07-sent_issuebert_large_pipeline_en.md | 71 +++++++++++ ...24-09-07-sent_legalbert_large_1_7m_2_en.md | 94 ++++++++++++++ ...024-09-07-sent_legalbertpt_sardinian_en.md | 94 ++++++++++++++ ...-sent_legalbertpt_sardinian_pipeline_en.md | 71 +++++++++++ ...24-09-07-sent_legalnlp_bert_pipeline_pt.md | 71 +++++++++++ .../2024-09-07-sent_legalnlp_bert_pt.md | 94 ++++++++++++++ .../2024-09-07-sent_manubert_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_marathi_bert_mr.md | 94 ++++++++++++++ ...024-09-07-sent_marathi_bert_pipeline_mr.md | 71 +++++++++++ .../2024-09-07-sent_marbertv2_ar.md | 94 ++++++++++++++ .../2024-09-07-sent_marbertv2_pipeline_ar.md | 71 +++++++++++ .../2024-09-07-sent_mattpuscibert_en.md | 94 ++++++++++++++ ...24-09-07-sent_mattpuscibert_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_me_bert_mixed_v2_mr.md | 94 ++++++++++++++ ...09-07-sent_me_bert_mixed_v2_pipeline_mr.md | 71 +++++++++++ .../2024-09-07-sent_medbert_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_mika_safeaerobert_en.md | 94 ++++++++++++++ .../2024-09-07-sent_mizbert_en.md | 94 ++++++++++++++ .../2024-09-07-sent_mlm_xlmr_base_vlsp_en.md | 94 ++++++++++++++ ...-07-sent_mlm_xlmr_base_vlsp_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_model_21200_en.md | 94 ++++++++++++++ ...2024-09-07-sent_model_21200_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_ofa_multi_100_en.md | 94 ++++++++++++++ ...24-09-07-sent_ofa_multi_100_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_ofa_multi_768_en.md | 94 ++++++++++++++ ...24-09-07-sent_ofa_multi_768_pipeline_en.md | 71 +++++++++++ ...-07-sent_patana_chilean_spanish_bert_es.md | 94 ++++++++++++++ .../2024-09-07-sent_pharmbert_cased_en.md | 94 ++++++++++++++ .../2024-09-07-sent_retromae_en.md | 94 ++++++++++++++ ...-09-07-sent_retromae_msmarco_distill_en.md | 94 ++++++++++++++ ...09-07-sent_retromae_msmarco_finetune_en.md | 94 ++++++++++++++ ...t_retromae_msmarco_finetune_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_retromae_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_rxbert_v1_en.md | 94 ++++++++++++++ ...7-sent_swiss_german_xlm_roberta_base_en.md | 94 ++++++++++++++ ...iss_german_xlm_roberta_base_pipeline_en.md | 71 +++++++++++ .../2024-09-07-sent_tb_xlm_r_fpt_en.md | 94 ++++++++++++++ ...024-09-07-sent_tech_roberta_pipeline_vi.md | 71 +++++++++++ .../2024-09-07-sent_tech_roberta_vi.md | 94 ++++++++++++++ .../2024-09-07-sent_telugu_bert_te.md | 94 ++++++++++++++ .../2024-09-07-sent_telugu_bertu_te.md | 94 ++++++++++++++ .../2024-09-07-sent_test999_en.md | 94 ++++++++++++++ .../2024-09-07-sent_test999_pipeline_en.md | 71 +++++++++++ ..._translit_ppa_mediterranean_pipeline_xx.md | 71 +++++++++++ ...9-07-sent_translit_ppa_mediterranean_xx.md | 94 ++++++++++++++ ...9-07-sent_tulio_chilean_spanish_bert_es.md | 94 ++++++++++++++ ..._tulio_chilean_spanish_bert_pipeline_es.md | 71 +++++++++++ ...09-07-sent_turkish_tiny_bert_uncased_tr.md | 94 ++++++++++++++ ...07-sent_uzbert_base_uncased_pipeline_uz.md | 71 +++++++++++ .../2024-09-07-sent_uzbert_base_uncased_uz.md | 94 ++++++++++++++ ...nt_vien_resume_roberta_base_pipeline_en.md | 71 +++++++++++ ...t_xlm_r_with_transliteration_average_en.md | 94 ++++++++++++++ ...m_r_with_transliteration_minangkabau_en.md | 94 ++++++++++++++ ...09-07-sent_xlm_roberta_base_1024_256_en.md | 94 ++++++++++++++ ...t_xlm_roberta_base_1024_256_pipeline_en.md | 71 +++++++++++ ...4-09-07-sent_xlm_roberta_base_arlama_en.md | 94 ++++++++++++++ ...ent_xlm_roberta_base_arlama_pipeline_en.md | 71 +++++++++++ ...t_xlm_roberta_base_finetuned_lingala_en.md | 94 ++++++++++++++ ...erta_base_finetuned_lingala_pipeline_en.md | 71 +++++++++++ ...sent_xlm_roberta_base_finetuned_rugo_en.md | 94 ++++++++++++++ ...nt_xlm_roberta_base_finetuned_somali_en.md | 94 ++++++++++++++ ...berta_base_finetuned_somali_pipeline_en.md | 71 +++++++++++ ...7-sent_zabantu_sot_ven_170m_pipeline_ve.md | 71 +++++++++++ .../2024-09-07-sentagram_pipeline_tr.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-sentagram_tr.md | 94 ++++++++++++++ .../2024-09-07-sentence_classifier_en.md | 86 +++++++++++++ ..._2epoch_100pair_mar2_contrastiveloss_en.md | 86 +++++++++++++ ...00pair_mar2_contrastiveloss_pipeline_en.md | 69 ++++++++++ ...tencepiecebpe_nachos_french_pipeline_en.md | 70 ++++++++++ ...bpe_pubmed_french_morphemes_pipeline_en.md | 70 ++++++++++ ...it_model_calgary_epochs2_jul_15_2023_en.md | 86 +++++++++++++ ...calgary_epochs2_jul_15_2023_pipeline_en.md | 69 ++++++++++ ...model_independence_labelintl_epochs2_en.md | 86 +++++++++++++ ...ependence_labelintl_epochs2_pipeline_en.md | 69 ++++++++++ ...09-07-sinhala_roberta_oscar_pipeline_si.md | 70 ++++++++++ .../2024-09-07-sinhala_roberta_oscar_si.md | 94 ++++++++++++++ ...ala_sentiment_analysis_sinbert_large_en.md | 94 ++++++++++++++ ...-09-07-sloberta_20480_not_pretrained_en.md | 94 ++++++++++++++ ...oberta_20480_not_pretrained_pipeline_en.md | 70 ++++++++++ ...-07-smm4h2024_task1_roberta_pipeline_en.md | 70 ++++++++++ ...24-09-07-somd_xlm_stage1_v2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-sota_3_en.md | 94 ++++++++++++++ .../2024-09-07-sota_3_pipeline_en.md | 70 ++++++++++ ...sotho_all_mpnet_finetuned_comb_12481_en.md | 86 +++++++++++++ ..._mpnet_finetuned_comb_12481_pipeline_en.md | 69 ++++++++++ ...mpnet_finetuned_french_1000_pipeline_en.md | 69 ++++++++++ ...uation_restoration_sanivert_pipeline_es.md | 70 ++++++++++ ...7-spanish_finnish_all_quy_1_pipeline_en.md | 70 ++++++++++ .../2024-09-07-spanish_finnish_extra_en.md | 94 ++++++++++++++ ...09-07-spanish_finnish_extra_pipeline_en.md | 70 ++++++++++ .../2024-09-07-spea_3_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-ssdlm_en.md | 94 ++++++++++++++ .../2024-09-07-ssdlm_pipeline_en.md | 70 ++++++++++ ...ford_deidentifier_base_finetuned_ner_en.md | 94 ++++++++++++++ ...-stanford_deidentifier_base_pipeline_en.md | 70 ++++++++++ ...9-07-stanford_deidentifier_only_i2b2_en.md | 94 ++++++++++++++ ...ford_deidentifier_only_i2b2_pipeline_en.md | 70 ++++++++++ ..._deidentifier_only_radiology_reports_en.md | 94 ++++++++++++++ ...poch_30_2024_07_26_16_19_31_pipeline_en.md | 70 ++++++++++ ...-09-07-swiss_german_xlm_roberta_base_en.md | 94 ++++++++++++++ ...-09-07-t_frex_roberta_large_pipeline_en.md | 70 ++++++++++ .../2024-09-07-taiyi_roberta_124m_d_en.md | 94 ++++++++++++++ .../2024-09-07-tajberto_pipeline_tg.md | 70 ++++++++++ .../ahmedlone127/2024-09-07-tajberto_tg.md | 94 ++++++++++++++ ...fication_distilbert_wnut_17_pipeline_en.md | 70 ++++++++++ .../2024-09-07-tesakantaibert_pipeline_en.md | 70 ++++++++++ .../2024-09-07-test_demo_qa_en.md | 86 +++++++++++++ ...09-07-test_esperberto_small_pipeline_eo.md | 70 ++++++++++ .../2024-09-07-test_qa_sanjeev_jasper_en.md | 86 +++++++++++++ ...9-07-test_qa_sanjeev_jasper_pipeline_en.md | 69 ++++++++++ ...7-test_setfit_model_bhuvana_pipeline_en.md | 69 ++++++++++ .../2024-09-07-test_w5_long_dataset_en.md | 94 ++++++++++++++ .../2024-09-07-testchatbotmodel1_en.md | 86 +++++++++++++ .../2024-09-07-testing_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-07-testmodel_en.md | 86 +++++++++++++ .../2024-09-07-testmodel_pipeline_en.md | 69 ++++++++++ .../2024-09-07-testtesttest_pipeline_en.md | 70 ++++++++++ ...09-07-textfooler_roberta_base_mrpc_5_en.md | 94 ++++++++++++++ ...tfooler_roberta_base_mrpc_5_pipeline_en.md | 70 ++++++++++ ...7-tnana_english_thai_align_finetuned_en.md | 94 ++++++++++++++ ...07-token_classification_adilhayat173_en.md | 94 ++++++++++++++ .../_posts/ahmedlone127/2024-09-07-tone_en.md | 94 ++++++++++++++ .../2024-09-07-tone_pipeline_en.md | 70 ++++++++++ ...ainer_chapter4_rishabh_sucks_at_code_en.md | 94 ++++++++++++++ ...24-09-07-trans_vietnamese_english_v2_en.md | 94 ++++++++++++++ ...4-09-07-translatear_english_pipeline_en.md | 70 ++++++++++ ...vietnamese_english_official_pipeline_en.md | 70 ++++++++++ ...-translit_ppa_mediterranean_pipeline_xx.md | 70 ++++++++++ ...024-09-07-translit_ppa_mediterranean_xx.md | 94 ++++++++++++++ ...trustpilot_balanced_location_roberta_en.md | 94 ++++++++++++++ ...4-09-07-twitter_roberta_base_jun2020_en.md | 94 ++++++++++++++ ...witter_roberta_base_jun2020_pipeline_en.md | 70 ++++++++++ ...4-09-07-twitter_roberta_base_mar2020_en.md | 94 ++++++++++++++ ...witter_roberta_base_mar2020_pipeline_en.md | 70 ++++++++++ ...4-09-07-twitter_roberta_base_mar2022_en.md | 94 ++++++++++++++ ...witter_roberta_base_mar2022_pipeline_en.md | 70 ++++++++++ ...er_roberta_base_sentiment_ahmetayrnc_en.md | 94 ++++++++++++++ ...a_base_sentiment_ahmetayrnc_pipeline_en.md | 70 ++++++++++ ...sentiment_finetuned_marc_tswana_v1_1_en.md | 94 ++++++++++++++ ..._finetuned_marc_tswana_v1_1_pipeline_en.md | 70 ++++++++++ ...lm_roberta_base_sentiment_finetunned_xx.md | 94 ++++++++++++++ .../2024-09-07-unibert_distilbert_3_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-07-urdubert_en.md | 94 ++++++++++++++ .../2024-09-07-urdubert_pipeline_en.md | 70 ++++++++++ .../2024-09-07-uzroberta_v2_pipeline_uz.md | 70 ++++++++++ .../2024-09-07-uzroberta_v2_uz.md | 94 ++++++++++++++ .../2024-09-07-v2_mrcl0ud_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-07-weights_en.md | 94 ++++++++++++++ ...24-09-07-whisper_base_cv17_hungarian_hu.md | 84 ++++++++++++ ...whisper_base_cv17_hungarian_pipeline_hu.md | 69 ++++++++++ ...9-07-whisper_gujarati_small_pipeline_gu.md | 69 ++++++++++ .../2024-09-07-whisper_noisy_pipeline_en.md | 69 ++++++++++ ...4-09-07-whisper_small200sep4_spanish_es.md | 84 ++++++++++++ ...hisper_small200sep4_spanish_pipeline_es.md | 69 ++++++++++ ...per_small_english_atco2_asr_pipeline_en.md | 69 ++++++++++ ...r_small_finetunedenglish_speechfinal_en.md | 84 ++++++++++++ ...-whisper_small_hindi_drinktoomuchsax_en.md | 84 ++++++++++++ ...small_hindi_drinktoomuchsax_pipeline_en.md | 69 ++++++++++ ...9-07-whisper_small_kurdish_sorani_10_ku.md | 84 ++++++++++++ ...per_small_kurdish_sorani_10_pipeline_ku.md | 69 ++++++++++ ...whisper_small_russian_v2_artyomboyko_ru.md | 84 ++++++++++++ ...-07-whisper_small_twi_arxiv_pipeline_tw.md | 69 ++++++++++ .../2024-09-07-whisper_tiny_few_audios_en.md | 84 ++++++++++++ ...-07-whisper_tiny_few_audios_pipeline_en.md | 69 ++++++++++ ...r_tiny_minds14_english_us_markredito_en.md | 84 ++++++++++++ ...nds14_english_us_markredito_pipeline_en.md | 69 ++++++++++ ...7-whisper_tiny_portuguese_dominguesm_pt.md | 84 ++++++++++++ .../2024-09-07-wikismall_roberta_en.md | 94 ++++++++++++++ ...024-09-07-wikismall_roberta_pipeline_en.md | 70 ++++++++++ .../2024-09-07-wolfbbsroberta_large_en.md | 94 ++++++++++++++ ...-09-07-wolfbbsroberta_large_pipeline_en.md | 70 ++++++++++ .../2024-09-07-wolof_description_guru_0_en.md | 86 +++++++++++++ ...07-wolof_description_guru_0_pipeline_en.md | 69 ++++++++++ ...4-09-07-wolof_finetuned_ner_pipeline_en.md | 70 ++++++++++ ...tuned_toxic_political_tweets_spanish_es.md | 94 ++++++++++++++ ...ic_political_tweets_spanish_pipeline_es.md | 70 ++++++++++ .../2024-09-07-xlm_roberta_base_autext_en.md | 94 ++++++++++++++ ...etuned_augument_visquad2_16_3_2023_1_en.md | 86 +++++++++++++ ...gument_visquad2_16_3_2023_1_pipeline_en.md | 69 ++++++++++ ...rta_base_finetuned_marc_english_tomo_en.md | 94 ++++++++++++++ ...finetuned_marc_english_tomo_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_marc_yuri_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_panx_all_aiekek_en.md | 94 ++++++++++++++ ...e_finetuned_panx_all_aiekek_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_all_cataluna84_en.md | 94 ++++++++++++++ ...ta_base_finetuned_panx_all_deepaperi_en.md | 94 ++++++++++++++ ...inetuned_panx_all_deepaperi_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_all_huggingbase_en.md | 94 ++++++++++++++ ...etuned_panx_all_huggingbase_pipeline_en.md | 70 ++++++++++ ...finetuned_panx_all_ladoza03_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_panx_all_sbpark_en.md | 94 ++++++++++++++ ...e_finetuned_panx_all_sbpark_pipeline_en.md | 70 ++++++++++ ...oberta_base_finetuned_panx_all_sreek_en.md | 94 ++++++++++++++ ...se_finetuned_panx_all_sreek_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_all_wendao_123_en.md | 94 ++++++++++++++ ...se_finetuned_panx_english_buruzaemon_en.md | 94 ++++++++++++++ ...ned_panx_english_buruzaemon_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_english_drigb_en.md | 94 ++++++++++++++ ...base_finetuned_panx_english_likejazz_en.md | 94 ++++++++++++++ ...tuned_panx_english_likejazz_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_english_nrazavi_en.md | 94 ++++++++++++++ ...etuned_panx_english_nrazavi_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_english_ryo_hsgw_en.md | 94 ++++++++++++++ ...tuned_panx_english_ryo_hsgw_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_english_taoyoung_en.md | 94 ++++++++++++++ ...tuned_panx_english_taoyoung_pipeline_en.md | 70 ++++++++++ ...rta_base_finetuned_panx_french_ferro_en.md | 94 ++++++++++++++ ...uned_panx_french_henryjiang_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_french_inniok_en.md | 94 ++++++++++++++ ...ned_panx_french_jfmatos_isq_pipeline_en.md | 70 ++++++++++ ...ase_finetuned_panx_french_robkayinto_en.md | 94 ++++++++++++++ ...se_finetuned_panx_french_wooseok0303_en.md | 94 ++++++++++++++ ...ned_panx_french_wooseok0303_pipeline_en.md | 70 ++++++++++ ...e_finetuned_panx_german_alexisxiaoyu_en.md | 94 ++++++++++++++ ...ed_panx_german_alexisxiaoyu_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_cicimen_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_german_eikoenchine_en.md | 94 ++++++++++++++ ...ned_panx_german_eikoenchine_pipeline_en.md | 70 ++++++++++ ...d_panx_german_emmanuelalo52_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_german_esperesa_en.md | 94 ++++++++++++++ ...ed_panx_german_french_andreaschandra_en.md | 94 ++++++++++++++ ...erman_french_andreaschandra_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_french_benjiccee_en.md | 94 ++++++++++++++ ...etuned_panx_german_french_cataluna84_en.md | 94 ++++++++++++++ ...nx_german_french_cataluna84_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_french_guruji108_en.md | 94 ++++++++++++++ ...anx_german_french_guruji108_pipeline_en.md | 70 ++++++++++ ...n_french_laurentiustancioiu_pipeline_en.md | 70 ++++++++++ ...finetuned_panx_german_french_noveled_en.md | 94 ++++++++++++++ ...ned_panx_german_french_smilingface88_en.md | 94 ++++++++++++++ ...netuned_panx_german_french_sponomary_en.md | 94 ++++++++++++++ ...anx_german_french_sponomary_pipeline_en.md | 70 ++++++++++ ...man_french_transformersbook_pipeline_en.md | 70 ++++++++++ ...etuned_panx_german_french_yasu320001_en.md | 94 ++++++++++++++ ..._finetuned_panx_german_french_yezune_en.md | 94 ++++++++++++++ ...finetuned_panx_german_french_yurit04_en.md | 94 ++++++++++++++ ..._panx_german_french_yurit04_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_german_gonalb_en.md | 94 ++++++++++++++ ..._base_finetuned_panx_german_gus07ven_en.md | 94 ++++++++++++++ ...erta_base_finetuned_panx_german_gv05_en.md | 94 ++++++++++++++ ..._finetuned_panx_german_gv05_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_german_huangjia_en.md | 94 ++++++++++++++ ...inetuned_panx_german_k4west_pipeline_en.md | 70 ++++++++++ ...tuned_panx_german_monkdalma_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_german_sanbatte_en.md | 94 ++++++++++++++ ...etuned_panx_german_sanbatte_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_german_sanyam_en.md | 94 ++++++++++++++ ...inetuned_panx_german_sanyam_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_german_takizawa_en.md | 94 ++++++++++++++ ...etuned_panx_german_takizawa_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_thkkvui_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_german_vasantha_ai_en.md | 94 ++++++++++++++ ...ned_panx_german_vasantha_ai_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_german_xiao888_en.md | 94 ++++++++++++++ ...ned_panx_italian_chris_choi_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_italian_jamie613_en.md | 94 ++++++++++++++ ...tuned_panx_italian_jamie613_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_italian_lsh231_en.md | 94 ++++++++++++++ ...netuned_panx_italian_lsh231_pipeline_en.md | 70 ++++++++++ ...inetuned_panx_italian_munsu_pipeline_en.md | 70 ++++++++++ ...uned_panx_italian_nobody138_pipeline_en.md | 70 ++++++++++ ..._base_finetuned_panx_italian_praboda_en.md | 94 ++++++++++++++ ...etuned_panx_italian_praboda_pipeline_en.md | 70 ++++++++++ ...finetuned_panx_italian_smilingface88_en.md | 94 ++++++++++++++ ..._panx_italian_smilingface88_pipeline_en.md | 70 ++++++++++ ...ta_base_finetuned_panx_korean_jhsign_en.md | 94 ++++++++++++++ ...inetuned_panx_korean_jhsign_pipeline_en.md | 70 ++++++++++ ...berta_base_finetuned_sayula_popoluca_en.md | 94 ++++++++++++++ ...berta_base_kyrgyzner_ttimur_pipeline_ky.md | 70 ++++++++++ ..._language_detection_disaster_twitter_en.md | 94 ++++++++++++++ ...ngual_text_genre_classifier_pipeline_xx.md | 70 ++++++++++ ...e_multilingual_text_genre_classifier_xx.md | 94 ++++++++++++++ ..._bhasa_vietnam_aug_backtranslation_1_en.md | 94 ++++++++++++++ ...etnam_aug_backtranslation_1_pipeline_en.md | 70 ++++++++++ ...xlm_roberta_base_panx_dataset_korean_en.md | 94 ++++++++++++++ ...2024-09-07-xlm_roberta_base_pipeline_xx.md | 70 ++++++++++ ...a_base_telugu_transliterate_pipeline_te.md | 70 ++++++++++ ...lm_roberta_base_telugu_transliterate_te.md | 94 ++++++++++++++ ..._arabic_30000_tweet_sentiment_arabic_en.md | 94 ++++++++++++++ ...0000_tweet_sentiment_arabic_pipeline_en.md | 70 ++++++++++ ...ese_60000_tweet_sentiment_portuguese_en.md | 94 ++++++++++++++ ..._roberta_base_tweet_sentiment_arabic_en.md | 94 ++++++++++++++ ...base_tweet_sentiment_arabic_pipeline_en.md | 70 ++++++++++ ...lm_roberta_base_ukrainian_ner_ukrner_uk.md | 94 ++++++++++++++ ...2024-09-07-xlm_roberta_base_wnut_ner_en.md | 94 ++++++++++++++ ...7-xlm_roberta_base_wnut_ner_pipeline_en.md | 70 ++++++++++ ...ase_xnli_arabic_trimmed_arabic_30000_en.md | 94 ++++++++++++++ .../2024-09-07-xlm_roberta_base_xx.md | 75 +++++++++++ ...roberta_finetuned_emojis_non_iid_fed_en.md | 94 ++++++++++++++ ...lm_roberta_german_sentiment_pipeline_xx.md | 70 ++++++++++ ...4-09-07-xlm_roberta_german_sentiment_xx.md | 94 ++++++++++++++ ...autonlp_roberta_base_squad2_24465516_en.md | 106 ++++++++++++++++ ...oberta_base_squad2_24465516_pipeline_en.md | 69 ++++++++++ ...-09-07-xlmr_base_toxicity_classifier_xx.md | 94 ++++++++++++++ ...man_norwegian_shuffled_orig_test1000_en.md | 94 ++++++++++++++ ...gian_shuffled_orig_test1000_pipeline_en.md | 70 ++++++++++ ...ian_english_all_shuffled_42_test1000_en.md | 94 ++++++++++++++ ...sh_all_shuffled_42_test1000_pipeline_en.md | 70 ++++++++++ .../2024-09-07-xlmr_finetuned_fquad_en.md | 86 +++++++++++++ ...-09-07-xlmr_finetuned_fquad_pipeline_en.md | 69 ++++++++++ ...english_train_shuffled_1986_test2000_en.md | 94 ++++++++++++++ ...rain_shuffled_1986_test2000_pipeline_en.md | 70 ++++++++++ ...024-09-07-xlmr_qa_extraction_english_en.md | 94 ++++++++++++++ ...-xlmr_qa_extraction_english_pipeline_en.md | 70 ++++++++++ ...024-09-07-xlmr_squad2_webis_pipeline_en.md | 69 ++++++++++ .../ahmedlone127/2024-09-07-xlmr_webis_en.md | 86 +++++++++++++ ...lmroberta_embeddings_marathi_roberta_mr.md | 100 +++++++++++++++ ..._embeddings_marathi_roberta_pipeline_mr.md | 70 ++++++++++ ...tugkaya_base_finetuned_panx_pipeline_de.md | 70 ++++++++++ ...-07-xlmroberta_ner_base_fin_pipeline_fi.md | 70 ++++++++++ ...base_finetuned_dholuo_finetuned_ner_luo.md | 94 ++++++++++++++ ...wanda_finetuned_ner_swahili_pipeline_sw.md | 70 ++++++++++ ...ed_kinyarwanda_finetuned_ner_swahili_sw.md | 115 +++++++++++++++++ ...ta_ner_base_uncased_mit_movie_trivia_en.md | 113 +++++++++++++++++ ..._edwardjross_base_finetuned_panx_all_xx.md | 112 ++++++++++++++++ ...608_base_finetuned_panx_all_pipeline_xx.md | 70 ++++++++++ ...ner_neha2608_base_finetuned_panx_all_xx.md | 112 ++++++++++++++++ ...oberta_ner_rgl73_base_finetuned_panx_de.md | 113 +++++++++++++++++ .../2024-09-07-xnli_xlm_r_only_turkish_en.md | 94 ++++++++++++++ ...-07-xnli_xlm_r_only_turkish_pipeline_en.md | 70 ++++++++++ ...-afro_xlmr_base_finetuned_kintweetsb_en.md | 94 ++++++++++++++ .../2024-09-08-agnews_padding60model_en.md | 94 ++++++++++++++ ..._weighted_hoax_classifier_definition_en.md | 94 ++++++++++++++ ..._hoax_classifier_definition_pipeline_en.md | 70 ++++++++++ ...ian_farsi_base_v2_sentiment_digikala_fa.md | 94 ++++++++++++++ ..._base_v2_sentiment_digikala_pipeline_fa.md | 70 ++++++++++ ...ter_twitter_preprocess_data_pipeline_en.md | 70 ++++++++++ ...net_base_v2_lr_1e_8_margin_5_epoch_3_en.md | 86 +++++++++++++ ...2024-09-08-all_mpnet_base_v2_navteca_en.md | 86 +++++++++++++ ...et_base_v2_topic_abstract_similarity_en.md | 86 +++++++++++++ .../2024-09-08-all_mpnet_janet_10k_v1_en.md | 86 +++++++++++++ ...9-08-all_mpnet_janet_10k_v1_pipeline_en.md | 69 ++++++++++ .../2024-09-08-amazonpolarity_fewshot_en.md | 86 +++++++++++++ ...08-analisis_sentimientos_beto_tass_c_en.md | 94 ++++++++++++++ ...iews_specificity_roberta_v1_pipeline_en.md | 70 ++++++++++ .../2024-09-08-atte_2_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-08-auro_4_en.md | 94 ++++++++++++++ .../2024-09-08-auro_4_pipeline_en.md | 70 ++++++++++ ..._5_with_masking_run2_finetuned_qasrl_en.md | 86 +++++++++++++ ...asking_run2_finetuned_qasrl_pipeline_en.md | 69 ++++++++++ ...9-08-bert_base_yelp_reviews_pipeline_en.md | 70 ++++++++++ ...08-bert_based_uncased_finetuned_imdb_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-08-bert_imdb_en.md | 94 ++++++++++++++ .../2024-09-08-bert_imdb_pipeline_en.md | 70 ++++++++++ ...ication_slovene_data_augmentation_ds_en.md | 94 ++++++++++++++ .../2024-09-08-bertoslav_limited_en.md | 94 ++++++++++++++ ...09-08-best_model_yelp_polarity_16_13_en.md | 94 ++++++++++++++ ...09-08-best_model_yelp_polarity_32_13_en.md | 94 ++++++++++++++ ...t_model_yelp_polarity_32_13_pipeline_en.md | 70 ++++++++++ ...09-08-best_model_yelp_polarity_64_21_en.md | 94 ++++++++++++++ ...t_model_yelp_polarity_64_21_pipeline_en.md | 70 ++++++++++ .../2024-09-08-cat_ner_xlmr_4_en.md | 94 ++++++++++++++ ...bert_base_uncased_distilled_squad_v1_en.md | 94 ++++++++++++++ ...rcorreosoportedistilespanol_pipeline_en.md | 70 ++++++++++ ...24-09-08-classification_model_mtebad_en.md | 94 ++++++++++++++ ...classification_model_mtebad_pipeline_en.md | 70 ++++++++++ ...09-08-classification_model_sushant22_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-08-cm124057_01_en.md | 94 ++++++++++++++ .../2024-09-08-cpu_netzero_classifier_en.md | 92 ++++++++++++++ ...9-08-cpu_netzero_classifier_pipeline_en.md | 70 ++++++++++ ...4-09-08-cpu_transport_ghg_classifier_en.md | 92 ++++++++++++++ ...pu_transport_ghg_classifier_pipeline_en.md | 70 ++++++++++ ...rdneg_finetuned_webnlg2020_relevance_en.md | 86 +++++++++++++ ...stommodel_yelp_hanyundudddd_pipeline_en.md | 70 ++++++++++ .../2024-09-08-darija_englishv2_1_en.md | 94 ++++++++++++++ ...024-09-08-depression_detection_model_en.md | 94 ++++++++++++++ ...-08-distilbert_base_cased_distilbert_en.md | 94 ++++++++++++++ ...ert_base_cased_finetuned_imdb_shindc_en.md | 94 ++++++++++++++ ...cased_finetuned_imdb_shindc_pipeline_en.md | 70 ++++++++++ ..._greek_modern_russian_cased_pipeline_en.md | 70 ++++++++++ ...ed_regression_finetuned_ptt_pipeline_xx.md | 70 ++++++++++ ...se_uncased_finetuned_clinc_schnatz65_en.md | 94 ++++++++++++++ ..._uncased_finetuned_emotion_bistudent_en.md | 94 ++++++++++++++ ...finetuned_emotion_bistudent_pipeline_en.md | 70 ++++++++++ ...se_uncased_finetuned_emotion_lilvoda_en.md | 94 ++++++++++++++ ...uncased_finetuned_emotion_niwang2024_en.md | 94 ++++++++++++++ ...finetuned_emotion_schnatz65_pipeline_en.md | 70 ++++++++++ ...cased_finetuned_emotion_talzoomanzoo_en.md | 94 ++++++++++++++ ...uned_emotion_with_annotated_by_gpt35_en.md | 94 ++++++++++++++ ...ion_with_annotated_by_gpt35_pipeline_en.md | 70 ++++++++++ ...sed_finetuned_imdb_adrien35_pipeline_en.md | 70 ++++++++++ ...d_finetuned_imdb_dylettante_pipeline_en.md | 70 ++++++++++ ..._finetuned_imdb_ellieburton_pipeline_en.md | 70 ++++++++++ ...e_uncased_finetuned_imdb_lidiapierre_en.md | 94 ++++++++++++++ ...uncased_finetuned_imdb_majkeldcember_en.md | 94 ++++++++++++++ ...uncased_finetuned_imdb_marcosautuori_en.md | 94 ++++++++++++++ ...base_uncased_finetuned_imdb_pbwinter_en.md | 94 ++++++++++++++ ..._base_uncased_finetuned_imdb_xxxxxcz_en.md | 94 ++++++++++++++ ...stilbert_base_uncased_finetuned_news_en.md | 94 ++++++++++++++ ...ncased_finetuned_stationary_pipeline_en.md | 70 ++++++++++ ...cased_finetuned_streamers_accelerate_en.md | 94 ++++++++++++++ ...ge13pfxnf_simsp400_clean200_pipeline_en.md | 70 ++++++++++ ...2024-09-08-distilbert_coarse5_js_1_1_en.md | 94 ++++++++++++++ ...8-distilbert_coarse5_js_1_1_pipeline_en.md | 70 ++++++++++ ...08-distilbert_masking_1perc_pipeline_en.md | 70 ++++++++++ ...view_sentiment_classifier_3_pipeline_en.md | 70 ++++++++++ ...9-08-distilbert_nsfw_text_classifier_en.md | 94 ++++++++++++++ ...ilbert_nsfw_text_classifier_pipeline_en.md | 70 ++++++++++ ...2024-09-08-distilbert_tweet_pipeline_en.md | 70 ++++++++++ ...09-08-distillbert_sentiment_analysis_en.md | 94 ++++++++++++++ ...tillbert_sentiment_analysis_pipeline_en.md | 70 ++++++++++ .../2024-09-08-facets_gpt_35_pipeline_en.md | 69 ++++++++++ .../2024-09-08-finance_news_classifier_en.md | 94 ++++++++++++++ .../2024-09-08-gal_enptsp_xlm_r_gl.md | 94 ++++++++++++++ ...-09-08-gal_portuguese_xlm_r_pipeline_gl.md | 70 ++++++++++ ...024-09-08-gal_sayula_popoluca_iwcg_4_en.md | 94 ++++++++++++++ ...an_seen_heard_bert_first512_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-08-hw1_eva1209_en.md | 94 ++++++++++++++ .../2024-09-08-hw_1_aia_tclin_en.md | 94 ++++++++++++++ .../2024-09-08-hw_1_aia_tclin_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-08-imdb_3_en.md | 94 ++++++++++++++ ...-09-08-imdb_distilbert_apoorvaec1030_en.md | 94 ++++++++++++++ ...db_distilbert_apoorvaec1030_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-08-impara_qe_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-08-inde_4_en.md | 94 ++++++++++++++ ...24-09-08-indobert_sentiment_analysis_id.md | 94 ++++++++++++++ ...24-09-08-intent_classifier_frana9812_en.md | 94 ++++++++++++++ ...ntent_distilbert_classifier_pipeline_en.md | 70 ++++++++++ docs/_posts/ahmedlone127/2024-09-08-joo_en.md | 94 ++++++++++++++ ...08-khmer_text_classification_roberta_km.md | 94 ++++++++++++++ ...text_classification_roberta_pipeline_km.md | 70 ++++++++++ .../2024-09-08-lenu_ewe_pipeline_en.md | 70 ++++++++++ .../2024-09-08-lexuz1_pipeline_en.md | 70 ++++++++++ .../2024-09-08-linkbert_base_en.md | 92 ++++++++++++++ .../2024-09-08-luganda_ner_v1_pipeline_en.md | 70 ++++++++++ ..._tonga_tonga_islands_french_bill1888_en.md | 94 ++++++++++++++ ...h_tonga_tonga_islands_french_viennes_en.md | 94 ++++++++++++++ ...nslator_nlp_course_chapter7_section3_en.md | 94 ++++++++++++++ .../2024-09-08-maskedlm_finetuned_imdb_en.md | 94 ++++++++++++++ .../2024-09-08-mlm_jjk_subtitle_en.md | 94 ++++++++++++++ ...glish_portuguese_msmarco_v1_pipeline_pt.md | 70 ++++++++++ ...t_base_nli_matryoshka_yoshinori_sano_en.md | 86 +++++++++++++ ...09-08-mpnet_twitter_freq100_pipeline_en.md | 69 ++++++++++ ...2024-09-08-multidim_default_template_en.md | 94 ++++++++++++++ ...sh_reg_avg_balanced_default_template_en.md | 94 ++++++++++++++ ...ltilingual_xlm_roberta_for_ner_c4n11_xx.md | 94 ++++++++++++++ ...-09-08-n2c2_soap_entailment_pipeline_en.md | 70 ++++++++++ ...roberta_imdb_padding10model_pipeline_en.md | 70 ++++++++++ ...e2e_05_batchsize8_11epoch_3_pipeline_en.md | 70 ++++++++++ ...-opus_maltese_english_bkm_10e6encdec_en.md | 94 ++++++++++++++ ...english_dutch_finetuned_20k_pipeline_en.md | 70 ++++++++++ ...english_tonga_tonga_islands_japanese_en.md | 94 ++++++++++++++ ...a_islands_english_agreement_pipeline_en.md | 70 ++++++++++ .../2024-09-08-pebblo_classifier_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-08-platzi_en.md | 94 ++++++++++++++ .../2024-09-08-platzi_pipeline_en.md | 70 ++++++++++ ...e_up_xlmr_oneshot_falsetrue_0_2_best_en.md | 94 ++++++++++++++ ..._oneshot_falsetrue_0_2_best_pipeline_en.md | 70 ++++++++++ ...edict_perception_xlmr_focus_assassin_en.md | 94 ++++++++++++++ ...ais_multi_qa_mpnet_base_dot_v1_8shot_en.md | 86 +++++++++++++ ..._qa_mpnet_base_dot_v1_8shot_pipeline_en.md | 69 ++++++++++ .../2024-09-08-qa_model9_test_en.md | 86 +++++++++++++ .../2024-09-08-quality_model_apr3_en.md | 94 ++++++++++++++ .../2024-09-08-recommend_songs_en.md | 94 ++++++++++++++ ...024-09-08-resume_sentence_classifier_en.md | 94 ++++++++++++++ ..._base_emotion_pysentimiento_pipeline_en.md | 70 ++++++++++ ...a_finetuned_subjqa_movies_2_ram20307_en.md | 86 +++++++++++++ ..._classification_aparnaullas_pipeline_en.md | 70 ++++++++++ ...8-roberta_qa_QA_for_Event_Extraction_en.md | 109 ++++++++++++++++ .../2024-09-08-roberta_qa_REQA_RoBERTa_en.md | 106 ++++++++++++++++ ...-08-roberta_qa_REQA_RoBERTa_pipeline_en.md | 69 ++++++++++ ...rta_base_finetuned_scrambled_squad_5_en.md | 106 ++++++++++++++++ ...-08-roberta_soft_llm_multip_pipeline_en.md | 70 ++++++++++ ...-08-romanurduclassification_pipeline_en.md | 70 ++++++++++ ...2024-09-08-rulebert_v0_4_k0_pipeline_it.md | 70 ++++++++++ ...24-09-08-semanlink_all_mpnet_base_v2_en.md | 86 +++++++++++++ ...-09-08-sent_memo_model_2500_pipeline_en.md | 71 +++++++++++ ...wegian_bokml_roberta_base_scandi_1e4_en.md | 94 ++++++++++++++ ..._r_with_transliteration_max_pipeline_en.md | 71 +++++++++++ ...erta_base_finetuned_amharic_pipeline_en.md | 71 +++++++++++ ..._xlm_roberta_base_finetuned_malagasy_en.md | 94 ++++++++++++++ ...rta_base_finetuned_malagasy_pipeline_en.md | 71 +++++++++++ ...xlm_roberta_base_finetuned_questions_en.md | 94 ++++++++++++++ ...ta_base_finetuned_questions_pipeline_en.md | 71 +++++++++++ ...9-08-seq2seq_finetuned_slang_english_en.md | 94 ++++++++++++++ ..._model_ireland_3labels_balanced_data_en.md | 86 +++++++++++++ ...eland_3labels_balanced_data_pipeline_en.md | 69 ++++++++++ ...land_4labels_unbalanced_data_3epochs_en.md | 86 +++++++++++++ ...nd_binary_label1_epochs2_feb_28_2023_en.md | 86 +++++++++++++ .../2024-09-08-sota_4_pipeline_en.md | 70 ++++++++++ ...tonga_tonga_islands_english_pipeline_en.md | 70 ++++++++++ .../2024-09-08-speech_pipeline_en.md | 69 ++++++++++ ...eckpoint_epoch_0_2024_07_26_11_37_42_en.md | 94 ++++++++++++++ ...epoch_0_2024_07_26_11_37_42_pipeline_en.md | 70 ++++++++++ ...ckpoint_epoch_10_2024_07_26_14_26_52_en.md | 94 ++++++++++++++ .../ahmedlone127/2024-09-08-test999_en.md | 94 ++++++++++++++ .../2024-09-08-test999_pipeline_en.md | 70 ++++++++++ .../2024-09-08-test_trainer4_en.md | 94 ++++++++++++++ .../2024-09-08-test_trainer4_pipeline_en.md | 70 ++++++++++ .../ahmedlone127/2024-09-08-testing_en.md | 94 ++++++++++++++ ...9-08-tmp_trainer_ubermenchh_pipeline_en.md | 70 ++++++++++ .../2024-09-08-trainer1f_pipeline_en.md | 70 ++++++++++ ...24-09-08-trainer_output_dir_pipeline_en.md | 70 ++++++++++ ...ecdl22_crossencoder_roberta_pipeline_en.md | 70 ++++++++++ .../2024-09-08-tweetcat_pipeline_en.md | 70 ++++++++++ ...08-twitter_roberta_base_topic_latest_en.md | 94 ++++++++++++++ .../2024-09-08-twitter_spam_classifier_en.md | 94 ++++++++++++++ ...-updated_distilbert_stance_detection_en.md | 94 ++++++++++++++ ...distilbert_stance_detection_pipeline_en.md | 70 ++++++++++ ...08-usclm_distilbert_base_uncased_mk1_en.md | 94 ++++++++++++++ docs/_posts/ahmedlone127/2024-09-08-w2l_en.md | 94 ++++++++++++++ .../2024-09-08-w2l_pipeline_en.md | 70 ++++++++++ .../2024-09-08-whisper_atcosim3_en.md | 84 ++++++++++++ ..._finetuned_common_voice_marathi_marh_mr.md | 84 ++++++++++++ ...d_common_voice_marathi_marh_pipeline_mr.md | 69 ++++++++++ ...a_base_final_mixed_aug_insert_bert_2_en.md | 94 ++++++++++++++ ...-xlm_roberta_base_finetuned_malagasy_en.md | 94 ++++++++++++++ ...rta_base_finetuned_panx_all_likejazz_en.md | 94 ++++++++++++++ ...finetuned_panx_all_likejazz_pipeline_en.md | 70 ++++++++++ ...netuned_panx_all_pockypocky_pipeline_en.md | 70 ++++++++++ ...ned_panx_english_iis2009002_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_french_goldenk_en.md | 94 ++++++++++++++ ...tuned_panx_german_fernweh23_pipeline_en.md | 70 ++++++++++ ...netuned_panx_german_french_alkampfer_en.md | 94 ++++++++++++++ ...nx_german_french_buruzaemon_pipeline_en.md | 70 ++++++++++ ...base_finetuned_panx_german_nitin1690_en.md | 94 ++++++++++++++ ...ed_panx_italian_aaa01101312_pipeline_en.md | 70 ++++++++++ ...se_finetuned_panx_italian_aiventurer_en.md | 94 ++++++++++++++ ...ned_panx_italian_aiventurer_pipeline_en.md | 70 ++++++++++ ...a_base_finetuned_panx_italian_leosol_en.md | 94 ++++++++++++++ ...roberta_base_finetuned_wikiann_hindi_en.md | 94 ++++++++++++++ ...ase_finetuned_wikiann_hindi_pipeline_en.md | 70 ++++++++++ ...anish_trimmed_spanish_60000_pipeline_en.md | 70 ++++++++++ ...berta_base_word_shopsign_nepal_bhasa_en.md | 94 ++++++++++++++ ...e_word_shopsign_nepal_bhasa_pipeline_en.md | 70 ++++++++++ ...li_french_3_classes_rua_wl_3_classes_fr.md | 94 ++++++++++++++ ...9-08-xlm_roberta_sentiment_romanurdu_en.md | 94 ++++++++++++++ ...09-08-xlm_twitter_politics_sentiment_en.md | 94 ++++++++++++++ ..._fake_news_detection_system_29906863_hi.md | 104 +++++++++++++++ ...s_detection_system_29906863_pipeline_hi.md | 70 ++++++++++ ...sifier_deoffxlmr_mono_tamil_pipeline_ta.md | 70 ++++++++++ ...erta_classifier_deoffxlmr_mono_tamil_ta.md | 105 +++++++++++++++ ...estionanswering_base_squad2_512_4096_en.md | 86 +++++++++++++ ...wering_base_squad2_512_4096_pipeline_en.md | 69 ++++++++++ ...4-09-08-xtremedistil_l6_h384_uncased_en.md | 94 ++++++++++++++ 4274 files changed, 348562 insertions(+) create mode 100644 docs/_posts/ahmedlone127/2024-09-01-deberta_v3_base_company_names_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-deberta_v3_large__sst2__train_8_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-expe_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-imdb_microsoft_deberta_v3_large_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-subreddit_description_topic_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-01-topic_labor_movement_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_1_batch_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_coqa_2_k_fold_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-bert_mini_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_20wds_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_lukiccc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_rdsmaia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-das22_10_camembert_pretrained_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-distil_task_b_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-distilbert_base_uncased_finetuned_lgbt_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-distilbert_nsfw_appropriate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_finetuned_cyberbullying_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_names_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-dummy_model_ankush_chander_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-dummy_model_jonathanlin0707_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-dummy_model_jongyeop_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-dummy_model_mindnetml_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-dummy_model_zonepg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-e5_90k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-emotion_recognition_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-english_hebrew_modern_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-feel_italian_finetuned_pro_emit_big8_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-finance_article_titles_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-finetuning_emotion_model_dearkarina_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-ganda_english_ai_lab_makerere_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-imdb_distilbert_base_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-indo_roberta_small_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-marianmt_hin_eng_czech_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-mobilebert_uncased_finetuned_squadv1_mrm8488_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-mpnet_base_nli_adaptive_layer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-personal_whisper_distilled_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-readabert_french_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-roberta_ner_roberta_base_tweetner_2020_2021_continuous_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-securebert_cyner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-sent_bert_base_greek_uncased_v1_el.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-sent_bert_large_portuguese_cased_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-sitexsometre_camembert_base_ccnet_stsb200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-sloberta_slo_word_spelling_annotator_sl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-temp_checkpoints_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-tiny_random_clipmodel_hf_tiny_model_private_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-transformer_maltese_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-turkish2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-whisper_base_quran_ai_by_tarteel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-whisper_small_english_jenrish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-02-whisper_small_taiwanese_minnan_take2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-accu_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-aigc_detector_env1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-al_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-albert_base_qa_2_k_fold_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_eclass_gart_labor_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_sts_juanignaciosolerno_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-autotrain_okr_iptal_3196789879_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-baai_bge_large_english_v1_5_fine_tuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bert_base_uncased_finetuned_squad_frozen_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bge_base_financial_matryoshka_dustyatx_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bge_small_english_dcpr_tuned_teachafy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bge_small_english_v1_5_ft_orc_0813_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-bm_french_pipeline_bm.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-brahmai_clip_v0_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-burmese_awesome_model_asmiishripad18_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-burmese_fine_tuned_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-classificateur_intention_camembert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-cleaned_bert_base_cased_500_620e5b_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-clip_vit_large_patch14_baseplate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-coptic_english_translator_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_base_mmarcofr_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_l4_mmarcofr_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-dataequity_opus_maltese_german_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-deberta_v3_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_hf_llm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-detect_femicide_news_xlmr_dutch_fft_freeze2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_deletion_multiclass_complete_final_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_edu_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_ag_news_v5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_iamsubrata_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mongdiutindei_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_sdinger_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_shenberg1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_react_content_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_indah1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_prateekag159_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_tokenizer_256k_mlm_best_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbert_yelp_sentiment_analysis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilgreek_bert_el.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilkobert_ft_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-distilroberta_sst2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-dlfbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-dummy_model_edge2992_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-dummy_model_mhrecaldeb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-e5_large_v2_vectoriseai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-embedded_e5_base_500_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-emotion_amaniabuzaid_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-english_tamil_translator_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_ganda_nllb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_turkish_finetuned_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-finer_ord_transformers_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-finetuned_twitter_profane_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-finetuning_sentiment_model_3000_samples_parth05_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-ft_clone_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-ft_clone_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_scifact_k10_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_test_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-helsinki_altp_indonesian_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-industry_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-industry_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-khmer_xlm_roberta_base_pipeline_km.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-lab1_random_coloteong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-legalevalrr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-mdeberta_expl_extraction_multi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_finetuned_sayula_popoluca_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_nubes_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-medical_pubmed_8_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-medrurobertalarge_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-mpnet_base_snli_mnli_finetuned_mnli_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-n_roberta_twitterfin_padding60model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-nuner_v2_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_base_danish_pipeline_da.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_base_russian_v0_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_cwe_classifier_kelemia_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_embeddings_amharic_roberta_pipeline_am.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_large_financial_news_sentiment_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_large_finnish_finnish_nlp_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-robertachem_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_bert_base_finnish_uncased_v1_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_bert_kor_base_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_bio_clinicalbert_emilyalsentzer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_memo_final_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_radbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_twitter_xlm_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_facebookai_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_finetuned_wolof_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sentiment_analysis_wangyh6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-singlelabelrecommendationmodel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-sitexsometre_camembert_large_stsb100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-splade_v3_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-squeezebert_uncased_finetuned_squad_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-surgicberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-takalane_northern_sotho_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-tokenizerlabeller_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-topic_obits_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-topic_politics_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-trained_model_distilbert_0305_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-transformer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-translation_finetuned_english_tonga_tonga_islands_jp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_dec2020_tweet_topic_multi_2020_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_sep2020_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_topic_sentiment_latest_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-twitter_sentiment_analysis_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_r_galen_meddocan_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_r_with_transliteration_max_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_clinais_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_marc_english_test_rundi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_panx_all_the_neural_networker_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_lcc_english_2e_5_42_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_longformer_4096_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_large_qa_norwegian_eanderson_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_squad_nepali_translated_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_base_finetuned_naija_pipeline_pcm.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_hugsao123_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_transformersbook_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-32_shot_twitter_2classes_head_body_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-600_stmodel_brand_rem_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-aditya_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-aditya_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-afriberta_base_finetuned_hausa_2e_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-akai_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_base_chinese_ws_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_base_qa_coqa_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_finetuned_ner_minhminh09_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rotten_tomatoes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rte_textattack_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_sentiment_multi_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-albert_small_kor_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_survey3000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-argureviews_component_deberta_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-astroentities_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-autonlp_covid_fake_news_36839110_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-autotrain_3_xlmr_fulltext_53881126794_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_ner_anglicisms_spanish_mbert_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_sayula_popoluca_estbert_xpos_128_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_sequence_classifier_coronabert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bertimbau_large_ner_total_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-beto_finetuned_ner_3_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-binary_token_classification_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-blair_roberta_base_generative_sentiment_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bob_oriya_not_bob_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-book_recognizer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-bsc_bio_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_model_2_nicolehao7_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_qa_model_40_len_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_actor_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_all_time_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_adisur_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_almifosa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_asrajgct_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_charliefederer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_claire5776_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_diodiodada_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_gaogao8_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hcy5561_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hrodriguez_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_lmattes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_malduwais_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_minhminh09_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_robinsh2023_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_rw2614_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_urisoo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_wzchen_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_saprotection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_target_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_bert_qa_model_05_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_luccaaug_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-camembert_base_dataikunlp_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-camembert_mlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clinicalbert_bionlp13cg_ner_nepal_bhasa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_crop_disease_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_demo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch322_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch32_demo_rvignav_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_l_14_laion2b_s32b_b82k_laion_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_336_q_mm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-codebert_python_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cpegen_vv_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-craft_clinicalbert_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cree_fewshot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cross_encoder_stsb_deberta_v3_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dagpap24_deberta_base_ft_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-datasnipper_finerdistilbert_fullsequence_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dbert_pii_detection_model_omshikhare_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_base_german_fluency_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_base_metaphor_detection_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_classification_base_prompt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_classifier_feedback_1024_pseudo_final_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_small_22feb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_tomatoes_sentiment_voodoo72_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v2_base_japanese_finetuned_emotion_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_10xp3_10xc4_128_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_1107_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_ai4privacy_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_bluegennx_run2_19_5e_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_qnli_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_sst2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_zeroshot_v2_0_28heldout_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_fever_pepa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_sentiment_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_smallsed_rte_finetuned_rte_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_v3_xsmall_mnli_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-deep_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-delivery_balanced_distilbert_base_uncased_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-disbert_finetune_for_gentriple_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distil_bert_docred_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_conll2003_english_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_pii_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_data_wnut_17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_english_greek_modern_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_einsteinkim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_finer_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb1004_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_greyfoss_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_r0in_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_sbulut_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_walterg777_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_masakhanenews_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_cadec_active_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_emilyblah_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_misterstino_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_polo42_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_vnear_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_zy666_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_news_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_sayula_popoluca_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_tanishq1420_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_streamers_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_go_emotion_bhadresh_savani_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mluonium_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mnli_textattack_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_finer_4_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_sayula_popoluca_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_nampham1106_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_ntn0301_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_hera_synthetic_pretrain_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_masking_heaps_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_multilingual_cased_lft_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_BERT_ClinicalQA_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_COVID_DistilBERTc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_distilBertABSA_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_eurosmart_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_tuned_4labels_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_turkish_sentiment_analysis2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilbert_word2vec_256k_mlm_best_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilcamembert_base_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_finer_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distillbert_political_finetune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_catalan_v2_ca.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_wandb_week_3_complaints_classifier_512_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model2_skr3178_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_7_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_ainullbabystep_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_benchan79_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_binitha_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_ffleming_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_jianfeng777_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_jonathansum_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_maxcarduner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_raphgg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_sunilpinnamaneni_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_tpanda09_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_umalakshmi07_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_vickysirwani_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-emotion_text_classifier_on_dd_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-environmentalbert_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-esg_sentiment_prediction_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-facets_gpt_77_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-facets_gpt_expanswer_35_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-fine_tuned_model_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finer_distillbert_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetuned_sail2017_indic_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetunedclip_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-first_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-fnctech_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-furina_with_transliteration_minangkabau_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-gdpr_anonymiseingsmodel_ganm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-geolm_base_toponym_recognition_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-gqa_roberta_german_legal_squad_2000_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-hw01_hamsty_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-icelandic_title_setfit_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-imdb_review_sentiement_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indic_bert_finetuned_trac_ds_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indicbert_hindi_urdu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indicbert_urdu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indicner_oriya_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indojave_codemixed_roberta_base_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-indonesian_punctuation_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-intent_xl_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-kalbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-kalematech_arabic_stt_asr_based_on_whisper_small_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-kanberto_pipeline_kn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-kaviel_threat_text_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-keyphrase_extraction_distilbert_inspec_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-legal_roberta_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lithuanian_un_data_fine_coarse_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mach_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-magbert_lm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-marianmt_igbo_best_18_10_23_pipeline_ig.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-masking_heaps_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_metaphor_detection_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_profane_final_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuded_porttagger_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_open_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_sst2_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_vnrte_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mix4_japanese_english_fugumt_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mnli_microsoft_deberta_v3_large_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mother_tongue_model_v3_sn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mount2_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mount2_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_bert_large_cased_portuguese_contratos_tceal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_cw_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_distilbert_textminr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ner_model_rujengelal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-nlp_mini_project_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-novelicious_qas_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-olm_roberta_base_latest_summarization_reward_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-ope_bert_v1_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-openai_clip_vit_large_patch14_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-openai_detector_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-openclip_negclip_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_finetuned_korean_german_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-pharma_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-practice_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_tweet_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-promptengpromptclassification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-pubmed_clip_vit_base_patch32_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-question_answering_roberta_base_s_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-raj_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-raj_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-readability_spanish_paragraphs_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-relation_detection_textual_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-results_gkumi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-results_gkumi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-results_raj_sharma01_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-rise_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-robako_base_asante_twi_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_bne_finetuned_suicide_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_fine_tuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_hate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_sqaud2_on_medical_meadow_medqa_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_machinesfaults_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_1110pm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_human_label_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_large_bne_sqac_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_model_abdulrahman4111_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_mrqa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_TestQaV1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_ai_club_inductions_21_nlp_roBERTa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_spanish_squades_becasincentivos2_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_finetuned_city_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_fpdm_hier_roberta_FT_newsqa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_news_pretrain_roberta_FT_newsqa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-same_story_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-scandibert_norwegian_faroese_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-schemeclassifier3_eng_dial_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-search_shield_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_arabert_c19_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_batterybert_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_cased_dbmdz_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_dbmdz_uncased_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_distil_ita_legal_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_persian_farsi_zwnj_base_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bertu_pipeline_mt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_bertweet_persian_farsi_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_darijabert_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_wep_xlmr_base_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_finbert_pretrain_yiyanghkust_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_inlegalbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_legal_bert_base_uncased_nlpaueb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_marbert_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_marbert_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_norbert2_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_patentbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_tiny_biobert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sent_xlm_roberta_base_finetuned_luganda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_morphemes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-serbian_test_clip_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-shus_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-shus_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_base_ccnet_stsb25_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_large_stsb25_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-southern_sotho_all_mpnet_finetuned_comb_3000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-stt_best_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tara_roberta_base_persian_farsi_qa_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tenseprediction_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tenseprediction_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-test3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-test_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-test_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-test_trainer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tiny_random_bertfortokenclassification_ydshieh_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tinyclip_vit_39m_16_text_19m_yfcc15m_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-trained_danish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-trained_danish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_10epoch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_1epoch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-tupy_bert_large_binary_classifier_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-twitter_paraphrase_embeddings_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-twitter_roberta_large_hate_latest_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-v39_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-v51_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-vir_pat_qa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-vispell_small_v1_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-whisper_small_finetune_taiwanese_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-whisper_small_singlish_122k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_english_tyocre_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_finetune_pooya_fallah_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_balance_mixed_aug_replace_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_amharic_am.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_handun_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_bessho_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_blanche_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_team_nave_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_malduwais_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sbpark_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_songys_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sponomary_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_transll_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_cj_mills_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_augmentation_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_russian_sentiment_sentirueval2016_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-04-yelp_polarity_microsoft_deberta_v3_base_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-1genreviewssentimentsamples_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-4_epoch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-accu_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-accu_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-accu_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-address_extraction_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-adp_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-aift_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_tiny_chinese_ws_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-arabert_ner_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-arabert_ner_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-arabic_ner_ace_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autofill_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_az.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_pipeline_az.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-balanced_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-balanced_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_base_german_uncased_dbmdz_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_pipeline_sv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_sv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_based_turkish_ner_wikiann_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_semitic_languages_eval_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_gemma_2_2b_italian_imdb_2bit_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_rubertconv_toxic_editor_pipeline_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_autotrain_jobberta_23_3671398065_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_foodbase_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_reddit_ner_place_names_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_uncased_keyword_discriminator_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bertimbau_base_sayula_popoluca_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bertislav_cu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bertislav_pipeline_cu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-berturk_legal_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-berturk_legal_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_citi_dataset_9k_1k_e1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_large_chinese_v1_6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_micro_v2_esg_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_small_bioasq_3epochs_batch32_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_esg_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomednlp_biomedbert_large_uncased_abstract_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_carmen_humano_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_eli5_mlm_model_nateile_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_jasssz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_lenatt_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_casual_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_chuhao1305_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_donbasta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_duggurani_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_fukada6280_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_girsha_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_laitrongduc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_langchain12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_lash_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manikanta_goli_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manusj_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_thypogean_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yjoonjang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yuting27_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_place_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_mundo_go_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_rwindia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-businessbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-businessbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-candle_cvss_scope_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-candle_cvss_vector_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-category_1_delivery_cancellation_distilbert_base_cased_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cefr_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clinicalbert_full_finetuned_ner_pablo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_large_fp16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_seed_vit_8_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_img_text_relevancy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch32_demo_xiaoliy2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-clip_zabir_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-codebertapy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-codebertapy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-codegeneration_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-codegeneration_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-commitpredictor_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-commitpredictor_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-context_two_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_uncased_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cpu_economywide_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-cryptocurrency_intent_search_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230814_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230923_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230908_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230923_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_idkmrc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_tagalog_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dbbert_pipeline_el.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dbert_ai4p_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_base_zero_shot_classifier_mnli_anli_v3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_sentencelevel_nofeatures_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_finetuned_cola_midterm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_prompt_injection_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_fever_garcialnk_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_pipeline_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-discharge_albert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-discharge_albert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-disorbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_dumiiii_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_tunahangokcimen_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_emotion_ft_0703_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_elshehawy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_jfcruz13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_neg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_douglasadams11_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ggital_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_lum4yx_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_nsboan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_recipes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sentiment_luluw_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_yelp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_pii_finance_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_qqp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_tokenclassification_yeji_seong_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_drugscom_depression_reviews_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_exp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_git_commits_bugfix_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding40model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_lolchamps_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_augmented_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_japanese_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_classifier_kiel1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_turkish_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilbert_v0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distillbert_fine_tune_ner_task_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distillbert_finetuned_ner_btc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_finetuned_energy_tweets_fullsample_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ditransformersert_base_uncased_tokenclassification_lora_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dock_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dock_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-drbert_casm2_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-english_tonga_tonga_islands_arabic_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-enlm_roberta_130_imdb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-esg_classification_french_english_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-experiment_foreign_language_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fake_news_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fin_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fin_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finance_news_classifier_kanuri_v7_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_iitp_pdt_review_indic_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_ner_sarthak7777_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_ift6758_hw6_sentiment_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_movie_sentiment_analysis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_gaurimm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_yuezhangjoslin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-flip_base_32_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-flip_base_32_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-flip_large_14_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-flip_large_14_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fralbert_base_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fralbert_base_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fromhf_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-fromhf_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-g3_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-game_content_safety_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-game_content_safety_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ganda_english_conrad747_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-german_text_classification_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-german_text_classification_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-greberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-greberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hafez_ner_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hafez_ner_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hihu2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hihu2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hihu4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hihu4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hiner_di_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hiner_di_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-hw01_liamli1991_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ibert_roberta_base_finetuned_wikineural_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_is.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_pipeline_is.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-incollection_recognizer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-indic_bert_finetuned_legal_try0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-indicbertner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-indicbertner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-indonesian_multi_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-inproceedings_recognizer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-issuebert_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-issuebert_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-isy503_a03_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-isy503_a03_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-iwslt17_marian_big_ctx4_cwd4_english_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lab1_true_random_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lavan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-lavan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-linkbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_lt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_pipeline_lt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-m8_mlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_32k_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marathi_albert_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marathi_albert_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_caresa_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-medical_enes_basque_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-medidalroberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mentalberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mentalberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-model11epochs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-model11epochs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-model_albert_5000_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-model_arebmann_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_adaptation_mitigation_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-multi_balanced_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-multilingual_sentiment_covid19_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-naija_twitter_sentiment_afriberta_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_cw_pipeline_testt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_demo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_fine_tuned_beto_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_meddocan_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ner_totalamount_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nerkor_cars_onpp_hubert_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nli_conventional_fine_tuning_m4faisal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nli_sentence_contradiction_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_bert_large_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_roberta_base_scandi_1e4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-norwegian_intent_classifier_model2_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nuner_v0_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nuner_v1_orgs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-nuner_v2_fewnerd_fine_super_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-output_ben_epstein_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-paiute_tonga_tonga_islands_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pii_model_ankitcodes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pll_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-poe_qa_mpnetbase_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-predicting_misdirection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_120k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_50k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-puoberta_pipeline_tn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-puoberta_tn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_02_oct_with_finetune_1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_finetuned_v1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-question_answering_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-recipebert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-recipebert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-regr_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-regr_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-reward_model_deberta_v3_unit_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_1b_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_52_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_exp_32_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_ner_sevixdd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_marathi_marh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_updated_pipeline_mn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_bg.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_pipeline_bg.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_go_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_go_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_large_1802_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_mlm_for_protein_clustering_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_base_filtered_cuad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_movie_roberta_MITmovie_squad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_small_basque_pipeline_eu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_stress_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberto_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-roberto_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rtmex23_pol4_cardif_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rubert_base_cased_conversational_ner_v3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-rupunct_big_pipeline_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-samind_sentiment_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_pipeline_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scibert_finetuned_ades_sonatafyai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_lt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_pipeline_lt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-secdisclosure_28l_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_arbertv2_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_berel_finetuned_dss_maskedlm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bert_base_qarib_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_pipeline_tl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_tl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_checkpoint_21200_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_corsican_condenser_marco_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_fairlex_cail_minilm_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_furina_indic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_hebert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_hebert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_simlm_base_msmarco_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_ft_cstwitter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_pretrained_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sentiment_sentiment_small_random2_seed0_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sept_1_2024_awesome_eli5_mlm_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-setfit_finetuned_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-shopee_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-singberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_pipeline_si.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_si.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_pipeline_sr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_sr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-stance_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-stance_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-stsb_mpnet_basev2_sitexse_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sunbird_english_ganda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_pipeline_su.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_su.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_stratch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tech_roberta_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tech_roberta_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_airbus_year_report_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_demo_qa_with_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_directory_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_directory_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-testing_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-testing_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-timeset_ifm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-topic_weather_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-trained_baseline_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-trained_croatian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-trained_slovak_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-translations_english_german_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-translations_english_german_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2019_90m_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2021_124m_irony_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-ukraine_waray_philippines_pov_uk.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-vietnews_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_bn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_pipeline_bn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_egyptian_asr_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_hindi_abhiramk6_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-wmdp_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_pretrain_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_1024_256_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_danwilbury_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_nace_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_sinquad_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_squad_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_dutch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_yelp_mlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_spoiling_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_chaii_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_vn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_is.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_chatgptdetect_noisy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_all_shuffled_764_test1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_squadv2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_tydiqa_tel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xml_roberta_science_subject_text_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-zh2en40_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-05-zh2en40_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-500_sdb_taxxl_truncate_768_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-5w1h_recognizer_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-7_shot_sta_slovak_batch10_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-acarisbert_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-accu_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-address_emnet_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ai_human_detai_kk.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_base_chinese_sayula_popoluca_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_base_v2_luciayn_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_bbc_news_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-assistantapp_whisper_quran_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-atte_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_cased_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_uncased_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_magicslabnu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_ner_cased_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_base_uncased_contracts_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_checkpoint_980000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_fda_nutrition_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_finetuned1_squad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_gps_research_001_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_whole_word_masking_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_ner_kriyans_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_phishing_classifier_student_jeahyung_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_ner_german_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bias_classifier_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-boolq_microsoft_deberta_v3_base_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_85_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_gamino_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_punccap_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_ahmad01010101_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_chernoffface_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_fede_ezeq_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_funailife_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_jennydqmm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_lizhealey_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_munnafaisal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_pechka_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_rentao_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_robinsh2023_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_speng123_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_walter133_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_wwhheerree_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jquali_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_adalee1001_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_dlim12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_mhdkj_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ttnksm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yannik_646_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_fine_tuning_opus_maltese_english_vietnamese_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_ner_model_mido545_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_nmt_model_ad_iiitd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-burmese_ws_extraction_model_26th_mar_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-candle_cvss_interaction_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-chatutterance_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-chuvash_validator_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-claim_extraction_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-clip_fashion_attribute_model_try_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-codice_fiscale_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-complaints_classifier_jpsteinhafel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-congretimbau_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-congretimbau_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-content_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cpegen_vpv_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cros_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cros_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cryptobertrefined_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ct_kld_xlmr_20230814_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-danish_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_amazon_reviews_v1_shuli_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_glue_cola_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_bass_complex_questions_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_8_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_v3_small_finetuned_mnli_rdp99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deberta_xsmall_hatespeech_reward_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deep_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_german_cased_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_ner_058_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_clinc_buruzaemon_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_skillripper_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_goemotion_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_baobao88_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_beccacohen_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_lb100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_cerastes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_blaze07_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_question_answering_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_conll2003_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_extractive_qa_project_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ai4privacy_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_amanpatkar_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_droo303_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_nmc_29092004_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_spanish_model_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_01_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_02_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbert_tokenizer_256k_mlm_750k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distillbert_newscategoryclassification_fullmodel_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_jdonnelly0804_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_alexey_wk_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_chuckym_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_ericchchiu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_exilesaber_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_fab7_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_jp1773hsu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_kwasi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_lourvalli_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_muger01_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_nugget00_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_rizwanakt_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_sapphirejade_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_srushnaik_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-eng_lug_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-eng_lug_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_coptic_norm_group_greekified_bt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_german_translation_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-expe_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-expe_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-extract_question_from_text_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-extractive_qa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fabert_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fabert_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-facets_128b_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-facets_128b_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-few_shot_learner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-film_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-film_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finance_bearish_bullish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fine_tune_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_base_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_english_marathi_marh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_marathi_marh_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_opusmt_english_hindi_gujarati_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_qa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuned_whisper_small_tanglish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_deberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-first_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-first_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-idmgsp_roberta_train_conclusion_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-imdb_gpt2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-imdb_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-imdb_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-inde_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-inde_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-inde_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-indo_aryan_xlm_r_base_pipeline_gu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-intentclassification_v3_kssumanth6_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-investopedia_qna_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-kannada_ner_kn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-kannada_ner_pipeline_kn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_pipeline_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_ms.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_pipeline_ms.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_ms.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_pipeline_ms.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marathi_marh_val_g_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_formality_fine_tuned_english_polish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-marianmix_english_chinese_10_deskdown_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-masked_lm_shakespeare_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_base_v3_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_hatebr_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-med_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_nl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_pipeline_nl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_base_seed_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-model_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-model_zip_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_80k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mpnet_multilabel_sector_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-multilingual_hate_speech_robacofi_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_dummy_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_phishing_email_detection_ful_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_model_cwchang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_model_ep1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_model_maccrobat_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_ner_random0_seed1_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nps_mpnet_lds_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nusabert_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-nusabert_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-ojobert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-perspective_conditional_utilitarian_deberta_01_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-phayathaibert_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-philberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-philberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-polyfaq_cross_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-punctuate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-punctuate_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q2d_128b_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_distell0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_model_sif10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_real_data_test_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_18_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-question_answer_thirdeyedata_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_babe_ft_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_classifier_large_finetuned_clinc_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_large_finnish_v2_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_ner_graphcodebert_MT4TS_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_ner_roberta_large_tweetner_random_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_small_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-roberta_small_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sb_temfac_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sb_temfac_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-secroberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-secroberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-securebert_aptner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-semantic_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-semantic_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sembr2023_bert_small_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_analysis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_analysis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_historic_multilingual_cased_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_romanian_uncased_v1_pipeline_ro.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_large_uncased_semeval2014_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_biomedvlp_cxr_bert_general_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_checkpoint_12600_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_cs.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_pipeline_cs.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_et.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_pipeline_et.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_fbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_fbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_hing_mbert_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_koobert_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_legal_bert_base_cased_ptbr_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_neural_cherche_sparse_embed_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_odia_bert_pipeline_or.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_splade_sparse_vector_pinecone_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_xhosa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_yoruba_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-setfit_model_test_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-southern_sotho_all_mpnet_finetuned_arabic_1500_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-spanish_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-spanish_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-t2t_gun_nlth_from_base_warmup_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-task_implicit_task__model_deberta__aug_method_ri_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-task_text_classification_distilbert_imdb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tb_xlm_r_fpt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tcfd_recommendation_classifier_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-telugu_bertu_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_model_tambeo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_nhoxxkienn_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_qa_model_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-test_with_web_interface_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-text_complexity_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_5000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_last_iter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tiny_random_mpnetforsequenceclassification_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-topic_topic_random0_seed2_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-torch_distilbert_policies_comparison_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-toxicity_model_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-toxicity_model_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-useless_model_try_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-village_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_pipeline_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_medium_arabic_arbml_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_medium_eg_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_medium_english_santhosh643_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_medium_medical_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_medium_portuguese_3000h_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_be.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_pipeline_be.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_br.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_pipeline_br.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_accented_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_cv17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_km.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_pipeline_km.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_haseong8012_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_lv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_pipeline_lv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_ml.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_pipeline_ml.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_mn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_pipeline_mn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_pipeline_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_pipeline_ta.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_ta.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_pipeline_xh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_xh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_pipeline_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_german_primeline_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_korean_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_latvian_lv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_ne.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_pipeline_ne.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_persian_farsi_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_serbian_yodas_v0_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_leosol_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_ankit15nov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_transformersbook_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_zebans_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ghks4861_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_rugo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_germeval21_toxic_with_data_augmentation_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_hungarian_ner_huner_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_tweet_sentiment_spanish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_uncased_pina_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_wolof_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_xnli_french_trimmed_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_thai_2_th.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_ner_swahili_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_haesun_base_finetuned_panx_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jamesmarcel_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_pipeline_ve.md create mode 100644 docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_ve.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-4_datasets_fake_news_with_balanced_with_add_one_sentence_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-accu_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-accu_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-action_policy_plans_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-agric_eng_lug_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ahisto_ner_model_s_mu_nlpc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_pipeline_ko.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_tiny_chinese_david_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_tweet_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-albert_tweet_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-aliens_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_20240102_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_ledgar_full_contrastive_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arbertv2_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arbertv2_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-augmented_distillbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-autotrain_qna_1170143354_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_hr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_pipeline_hr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-benjys_first_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_b02_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_b02_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_cased_ner_conll2003_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_ner_pii_fn_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_base_turkish_uncased_ner_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_qa_model_jahanzeb1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_static_malware_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-biodivbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_00005_16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_150k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-brwac_v1_2__checkpoint_last_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_cantemist_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_meddocan_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_livingner_humano_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_symptemist_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_akash24_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ayushij074_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_b43646_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bilalkhan2024_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_dedemilano_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_gaogao8_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jackyfung00358_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jyl480_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_kalyanmaram_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_markchiing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_myajun_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_pavi156_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ravinderbrai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_reza2002_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_shrutina_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_venkatarajendra_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_vikas12061995_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_wandaabudiono_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_setfit_model_ivanzidov_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_jhs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_place_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_anirudhramoo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_carlonos_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_halikuralde2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_pavement_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_priyanshug0405_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_robertiulian10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_sreeharipv_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_stephen_osullivan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_wstcpyt1988_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_neg_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_distilbert_model_qaicodes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_first_qa_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_spanish_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-byline_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-byline_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-camembert_base_finetuned_on_runaways_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cat_ner_xlmr_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cat_sayula_popoluca_iwcg_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-chemberta_pubchem1m_shard00_140k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-codebert_small_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-coha2000s_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-constructive_model_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-constructive_model_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-contrabert_c_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-contrabert_c_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-convbert_base_turkish_cased_ner_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cot_ep3_1122_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-covid_qa_distillbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cross_encoder_mmarco_mminilmv2_l12_h384_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cuad_distil_governing_law_08_28_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-cybert_cyner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-deberta_large_twitter_pop_binary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-demomodel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-demomodel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-deproberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-deproberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distil_train_token_classification_nepal_bhasa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_finetuned_pfe_projectt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_text_intelligence_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_detected_jailbreak_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_distilled_clinc_schnatz65_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_clinc_jeremygf_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_con_dataset_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_raincheck_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_hemg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_raulgdp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_wangyue6761_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bat007_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ep8_batch16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_gyeol0225_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_sasa3396_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_soikit_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_vijaym_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_yweslakarep_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_wikiann_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_pii_200_burkelive_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_lora_merged_jeukhwang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p10_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p15_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_pruned_p30_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_ner_veronica1608_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_eenda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_lusic_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_imdb_deborahm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_kazakh_ner_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_squad_dofla_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilbert_twitter_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distillber_squadv2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_climate_d_s_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_trueunpopularopinion_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_alejoa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_anrilombard_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_bhaskar_gautam_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_hanzhuo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_jfforero_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_linyi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_pipeline_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_rkn222_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_test_osanseviero_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dummy_model_zh.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-dzoqa_malayalam_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-elvis_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-emotion_test_1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-envroberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-erikrepo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-esmlmt62_2500_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-esperberto_small_sayula_popoluca_eo.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ewondo_xlm_roberta_base_pipeline_nan.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-expe_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-expe_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-extractive_qa_squad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fewshot_qa_002_20230622_001_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fine_tune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fine_tune_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fine_tuned_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fine_tuned_tradisi_bali_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finetuning_mixed_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-finsentencebert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-fresh_model_uncased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-from_classifier_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-generative_qas_pariwisata_bali_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hafez_bert_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hate_speech_detection_mpnet_basev2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v12_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-hupd_distilroberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-idiom_xlm_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-improved_xlm_attempt2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-intent_global_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-isom5240_whisper_small_zhhk_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_small_ctx4_cwd1_english_french_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_cheyannelam_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lab1_random_daanjiri_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lab1_random_sfliao_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lbt5_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lbt5_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-legal_longformer_base_4096_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-llama_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lld_valbadia_ita_loresmt_l4_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-logprecis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_mk.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_pipeline_mk.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-maltese_norwegian_swedish_finetuned_pipeline_sv.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-malurl_roberta_10e_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_frwo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marian_maltese_bbc_nigerian_pidgin_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-marianmt_ufal_english_spanish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_da.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_pipeline_da.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mformer_care_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-minilmv2_l6_h384_r_fineweb_100k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model3e_norwegian_wd_norwegian_perturb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-model_name_kayyyy27_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-movie_review_roberta_imalexianne_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-msbert_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-msbert_pipeline_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-mtl_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-n_roberta_imdb_padding60model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-named_entity_recognition_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_danish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_411_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_411_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_cw_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_legal_german_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_ner_random3_seed0_roberta_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ner_newsagency_bert_french_pipeline_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nofibot3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_bert_base_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_pipeline_no.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nuclear_medicine_daroberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-nyaszzzz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opticalbert_cased_squad2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_big_ailem_random_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_french_english_bds_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-othe_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-othe_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_miguel_moroyoqui_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-portuguese_finegrained_one_shot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-prof_ner_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-project_us_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-project_us_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-prompt_ls_portuguese_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-psais_all_mpnet_base_v2_10shot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-pubchem10m_smiles_bpe_390k_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_ccc_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_iiitdmj_testing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_model_martacaldero_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-queansmodel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-r_fb_sms_lm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-r_t_sms_lm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-refpydst_5p_referredstates_split_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-robasquerta_eu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_2020_q1_filtered_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_catalan_plantl_gob_es_pipeline_ca.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_education_domain_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_29_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_43_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_neg_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_ganbold13_mn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_turshilt2_mn.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_qqp_two_stage_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_thai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_conll_learning_rate1e4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_full_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_india_ner_trainer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_ca.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_pipeline_ca.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_genia_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_mrpc_two_stage_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_schizophrenia_v3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_ncc_des_128_decayfrom200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_psych_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_spanish_clinical_trials_misc_ents_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-robertalexpt_base_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-robertatwitterbr_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ros_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ros_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-rseq2_full_data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-run1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sbert_imdb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_albert_persian_farsi_zwnj_base_v2_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_alephbertgimmel_base_512_pipeline_he.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bernice_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bernice_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_1890_1900_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_german_cased_oldvocab_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_pipeline_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_magicslabnu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_polish_uncased_v1_pl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_zhtw_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bertislav_pipeline_cu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_cocodr_base_msmarco_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_custominlawbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_distil_clinicalbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_eq_bert_v1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_financialbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_georgian_corpus_model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_max_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_minangkabau_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_hindi_bert_v2_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_hing_bert_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_legalbert_large_1_7m_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_manubert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_pipeline_ar.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_medbert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mika_safeaerobert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mizbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_patana_chilean_spanish_bert_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_pharmbert_cased_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_retromae_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_distill_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_retromae_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_rxbert_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_tb_xlm_r_fpt_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_pipeline_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_vi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bert_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bertu_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_test999_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_test999_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_turkish_tiny_bert_uncased_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_pipeline_uz.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_uz.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_vien_resume_roberta_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_average_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_minangkabau_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_rugo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sent_zabantu_sot_ven_170m_pipeline_ve.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentagram_pipeline_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentagram_tr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentence_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_nachos_french_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_pubmed_french_morphemes_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_pipeline_si.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_si.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sinhala_sentiment_analysis_sinbert_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-smm4h2024_task1_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sota_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-sota_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-spanish_capitalization_punctuation_restoration_sanivert_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_all_quy_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-spea_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ssdlm_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-ssdlm_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_finetuned_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_radiology_reports_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-swiss_german_xlm_roberta_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-t_frex_roberta_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-taiyi_roberta_124m_d_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tajberto_pipeline_tg.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tajberto_tg.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-task_token_classification_distilbert_wnut_17_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tesakantaibert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_demo_qa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_esperberto_small_pipeline_eo.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_setfit_model_bhuvana_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-test_w5_long_dataset_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-testchatbotmodel1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-testing_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-testmodel_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-testmodel_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-testtesttest_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tnana_english_thai_align_finetuned_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-token_classification_adilhayat173_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tone_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-tone_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-trainer_chapter4_rishabh_sucks_at_code_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-trans_vietnamese_english_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-translatear_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-translation_vietnamese_english_official_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-trustpilot_balanced_location_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetunned_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-unibert_distilbert_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-urdubert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-urdubert_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_pipeline_uz.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_uz.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-v2_mrcl0ud_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-weights_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_pipeline_hu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_gujarati_small_pipeline_gu.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_noisy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_english_atco2_asr_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_finetunedenglish_speechfinal_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_ku.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_pipeline_ku.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_russian_v2_artyomboyko_ru.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_small_twi_arxiv_pipeline_tw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_portuguese_dominguesm_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-wolof_finetuned_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_autext_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_yuri_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_cataluna84_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_wendao_123_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_drigb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_ferro_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_inniok_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_robkayinto_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_esperesa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_benjiccee_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_noveled_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_smilingface88_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yasu320001_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yezune_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gonalb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gus07ven_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_huangjia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_xiao888_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_sayula_popoluca_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_language_detection_disaster_twitter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_panx_dataset_korean_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_pipeline_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_te.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_ukrainian_ner_ukrner_uk.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_finetuned_emojis_non_iid_fed_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_base_toxicity_classifier_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_squad2_webis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmr_webis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_fin_pipeline_fi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_uncased_mit_movie_trivia_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_rgl73_base_finetuned_panx_de.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-afro_xlmr_base_finetuned_kintweetsb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-agnews_padding60model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_navteca_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_topic_abstract_similarity_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-amazonpolarity_fewshot_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-analisis_sentimientos_beto_tass_c_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-argureviews_specificity_roberta_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-atte_2_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-auro_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-auro_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bert_base_yelp_reviews_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bert_based_uncased_finetuned_imdb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bert_imdb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bert_imdb_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-bertoslav_limited_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_16_13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cat_ner_xlmr_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-clasificadorcorreosoportedistilespanol_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-classification_model_sushant22_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cm124057_01_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-custommodel_yelp_hanyundudddd_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-darija_englishv2_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-depression_detection_model_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_distilbert_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_english_greek_modern_russian_cased_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_clinc_schnatz65_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_lilvoda_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_niwang2024_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_lidiapierre_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_majkeldcember_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_marcosautuori_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_pbwinter_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_xxxxxcz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_news_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_stationary_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_streamers_accelerate_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_masking_1perc_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_movie_review_sentiment_classifier_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distilbert_tweet_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-facets_gpt_35_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-finance_news_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-gal_enptsp_xlm_r_gl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-gal_portuguese_xlm_r_pipeline_gl.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-gal_sayula_popoluca_iwcg_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-hw1_eva1209_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-imdb_3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-impara_qe_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-inde_4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-indobert_sentiment_analysis_id.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-intent_classifier_frana9812_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-intent_distilbert_classifier_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-joo_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_km.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_pipeline_km.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-lenu_ewe_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-lexuz1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-linkbert_base_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-luganda_ner_v1_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-maskedlm_finetuned_imdb_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-mlm_jjk_subtitle_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-mpnet_base_nli_matryoshka_yoshinori_sano_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-mpnet_twitter_freq100_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-multidim_default_template_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-multidim_romansh_reg_avg_balanced_default_template_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-multilingual_xlm_roberta_for_ner_c4n11_xx.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-n2c2_soap_entailment_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-n_roberta_imdb_padding10model_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_bkm_10e6encdec_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_dutch_finetuned_20k_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-pebblo_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-platzi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-platzi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-predict_perception_xlmr_focus_assassin_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-qa_model9_test_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-quality_model_apr3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-recommend_songs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-resume_sentence_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_base_emotion_pysentimiento_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_finetuned_subjqa_movies_2_ram20307_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_news_classification_aparnaullas_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_qa_QA_for_Event_Extraction_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_qa_roberta_base_finetuned_scrambled_squad_5_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-roberta_soft_llm_multip_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-romanurduclassification_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-rulebert_v0_4_k0_pipeline_it.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-semanlink_all_mpnet_base_v2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_memo_model_2500_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_norwegian_bokml_roberta_base_scandi_1e4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_r_with_transliteration_max_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_amharic_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-seq2seq_finetuned_slang_english_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_4labels_unbalanced_data_3epochs_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-sota_4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-spark_name_burmese_tonga_tonga_islands_english_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-speech_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-test999_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-test999_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-test_trainer4_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-test_trainer4_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-testing_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-tmp_trainer_ubermenchh_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-trainer1f_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-trainer_output_dir_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-trecdl22_crossencoder_roberta_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-tweetcat_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-twitter_roberta_base_topic_latest_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-twitter_spam_classifier_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-usclm_distilbert_base_uncased_mk1_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-w2l_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-w2l_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-whisper_atcosim3_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_final_mixed_aug_insert_bert_2_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_malagasy_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_french_goldenk_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_alkampfer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_nitin1690_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_leosol_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_sentiment_romanurdu_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlm_twitter_politics_sentiment_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_ta.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en.md create mode 100644 docs/_posts/ahmedlone127/2024-09-08-xtremedistil_l6_h384_uncased_en.md diff --git a/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_base_company_names_en.md b/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_base_company_names_en.md new file mode 100644 index 00000000000000..5d59cac1de733c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_base_company_names_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_company_names DeBertaForTokenClassification from nbroad +author: John Snow Labs +name: deberta_v3_base_company_names +date: 2024-09-01 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.4.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_company_names` is a English model originally trained by nbroad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_company_names_en_5.4.2_3.0_1725197551202.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_company_names_en_5.4.2_3.0_1725197551202.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_company_names","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_company_names", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_company_names| +|Compatibility:|Spark NLP 5.4.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|661.8 MB| + +## References + +https://huggingface.co/nbroad/deberta-v3-base-company-names \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_large__sst2__train_8_2_en.md b/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_large__sst2__train_8_2_en.md new file mode 100644 index 00000000000000..b641a96d8748a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-deberta_v3_large__sst2__train_8_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_8_2 DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_8_2 +date: 2024-09-01 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.4.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_8_2` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_8_2_en_5.4.2_3.0_1725182599185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_8_2_en_5.4.2_3.0_1725182599185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_8_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_8_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_8_2| +|Compatibility:|Spark NLP 5.4.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-8-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-expe_1_en.md b/docs/_posts/ahmedlone127/2024-09-01-expe_1_en.md new file mode 100644 index 00000000000000..eb194d77ebbb3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-expe_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English expe_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: expe_1 +date: 2024-09-01 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`expe_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/expe_1_en_5.5.0_3.0_1725212357824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/expe_1_en_5.5.0_3.0_1725212357824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|expe_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Expe_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-imdb_microsoft_deberta_v3_large_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-01-imdb_microsoft_deberta_v3_large_seed_1_en.md new file mode 100644 index 00000000000000..d4ebb92dc14675 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-imdb_microsoft_deberta_v3_large_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English imdb_microsoft_deberta_v3_large_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: imdb_microsoft_deberta_v3_large_seed_1 +date: 2024-09-01 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_microsoft_deberta_v3_large_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725209314777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725209314777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("imdb_microsoft_deberta_v3_large_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("imdb_microsoft_deberta_v3_large_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_microsoft_deberta_v3_large_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/imdb_microsoft_deberta-v3-large_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-01-roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es.md new file mode 100644 index 00000000000000..dc166fbc4f442a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_base_biomedical_spanish_plantl_gob_es_pipeline pipeline RoBertaEmbeddings from PlanTL-GOB-ES +author: John Snow Labs +name: roberta_base_biomedical_spanish_plantl_gob_es_pipeline +date: 2024-09-01 +tags: [es, open_source, pipeline, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.4.2 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_biomedical_spanish_plantl_gob_es_pipeline` is a Castilian, Spanish model originally trained by PlanTL-GOB-ES. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es_5.4.2_3.0_1725186794677.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_biomedical_spanish_plantl_gob_es_pipeline_es_5.4.2_3.0_1725186794677.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_biomedical_spanish_plantl_gob_es_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_biomedical_spanish_plantl_gob_es_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_biomedical_spanish_plantl_gob_es_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.4.2+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|299.0 MB| + +## References + +https://huggingface.co/PlanTL-GOB-ES/roberta-base-biomedical-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-subreddit_description_topic_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-01-subreddit_description_topic_classifier_pipeline_en.md new file mode 100644 index 00000000000000..95c151e59e6cd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-subreddit_description_topic_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English subreddit_description_topic_classifier_pipeline pipeline DistilBertForSequenceClassification from gulnuravci +author: John Snow Labs +name: subreddit_description_topic_classifier_pipeline +date: 2024-09-01 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`subreddit_description_topic_classifier_pipeline` is a English model originally trained by gulnuravci. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/subreddit_description_topic_classifier_pipeline_en_5.5.0_3.0_1725213664485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/subreddit_description_topic_classifier_pipeline_en_5.5.0_3.0_1725213664485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("subreddit_description_topic_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("subreddit_description_topic_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|subreddit_description_topic_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/gulnuravci/subreddit_description_topic_classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-01-topic_labor_movement_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-01-topic_labor_movement_pipeline_en.md new file mode 100644 index 00000000000000..a5a2ff9486962b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-01-topic_labor_movement_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English topic_labor_movement_pipeline pipeline RoBertaForSequenceClassification from dell-research-harvard +author: John Snow Labs +name: topic_labor_movement_pipeline +date: 2024-09-01 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.4.2 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_labor_movement_pipeline` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_labor_movement_pipeline_en_5.4.2_3.0_1725194759819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_labor_movement_pipeline_en_5.4.2_3.0_1725194759819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("topic_labor_movement_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("topic_labor_movement_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_labor_movement_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.4.2+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/dell-research-harvard/topic-labor_movement + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_1_batch_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_1_batch_1_pipeline_en.md new file mode 100644 index 00000000000000..af393d7d8c18af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_1_batch_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English albert_base_qa_1_batch_1_pipeline pipeline AlbertForQuestionAnswering from mateiaass +author: John Snow Labs +name: albert_base_qa_1_batch_1_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_qa_1_batch_1_pipeline` is a English model originally trained by mateiaass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_qa_1_batch_1_pipeline_en_5.5.0_3.0_1725310121141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_qa_1_batch_1_pipeline_en_5.5.0_3.0_1725310121141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_qa_1_batch_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_qa_1_batch_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_qa_1_batch_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/mateiaass/albert-base-qa-1-batch-1 + +## Included Models + +- MultiDocumentAssembler +- AlbertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_coqa_2_k_fold_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_coqa_2_k_fold_2_pipeline_en.md new file mode 100644 index 00000000000000..70c1f715c9be46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-albert_base_qa_coqa_2_k_fold_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English albert_base_qa_coqa_2_k_fold_2_pipeline pipeline AlbertForQuestionAnswering from mateiaass +author: John Snow Labs +name: albert_base_qa_coqa_2_k_fold_2_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_qa_coqa_2_k_fold_2_pipeline` is a English model originally trained by mateiaass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_qa_coqa_2_k_fold_2_pipeline_en_5.5.0_3.0_1725310002428.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_qa_coqa_2_k_fold_2_pipeline_en_5.5.0_3.0_1725310002428.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_qa_coqa_2_k_fold_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_qa_coqa_2_k_fold_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_qa_coqa_2_k_fold_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/mateiaass/albert-base-qa-coQA-2-k-fold-2 + +## Included Models + +- MultiDocumentAssembler +- AlbertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-bert_mini_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-02-bert_mini_uncased_en.md new file mode 100644 index 00000000000000..d012a8e53f02ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-bert_mini_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_mini_uncased BertEmbeddings from gaunernst +author: John Snow Labs +name: bert_mini_uncased +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_uncased` is a English model originally trained by gaunernst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_uncased_en_5.5.0_3.0_1725318489391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_uncased_en_5.5.0_3.0_1725318489391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_mini_uncased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_mini_uncased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/gaunernst/bert-mini-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_20wds_en.md b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_20wds_en.md new file mode 100644 index 00000000000000..5c50355f4c97d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_20wds_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_20wds DistilBertForSequenceClassification from 20wds +author: John Snow Labs +name: burmese_awesome_model_20wds +date: 2024-09-02 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_20wds` is a English model originally trained by 20wds. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_20wds_en_5.5.0_3.0_1725305767512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_20wds_en_5.5.0_3.0_1725305767512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_20wds","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_20wds", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_20wds| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/20wds/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_lukiccc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_lukiccc_pipeline_en.md new file mode 100644 index 00000000000000..0b150e24b8460a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_lukiccc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_lukiccc_pipeline pipeline DistilBertForSequenceClassification from Lukiccc +author: John Snow Labs +name: burmese_awesome_model_lukiccc_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_lukiccc_pipeline` is a English model originally trained by Lukiccc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_lukiccc_pipeline_en_5.5.0_3.0_1725291782005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_lukiccc_pipeline_en_5.5.0_3.0_1725291782005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_lukiccc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_lukiccc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_lukiccc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Lukiccc/my_awesome_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_rdsmaia_en.md b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_rdsmaia_en.md new file mode 100644 index 00000000000000..27b9d7bf6aa348 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-burmese_awesome_model_rdsmaia_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_rdsmaia DistilBertForSequenceClassification from rdsmaia +author: John Snow Labs +name: burmese_awesome_model_rdsmaia +date: 2024-09-02 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_rdsmaia` is a English model originally trained by rdsmaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_rdsmaia_en_5.5.0_3.0_1725306262160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_rdsmaia_en_5.5.0_3.0_1725306262160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_rdsmaia","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_rdsmaia", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_rdsmaia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/rdsmaia/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-das22_10_camembert_pretrained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-das22_10_camembert_pretrained_pipeline_en.md new file mode 100644 index 00000000000000..6a0fdbe7acb0ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-das22_10_camembert_pretrained_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English das22_10_camembert_pretrained_pipeline pipeline CamemBertEmbeddings from HueyNemud +author: John Snow Labs +name: das22_10_camembert_pretrained_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`das22_10_camembert_pretrained_pipeline` is a English model originally trained by HueyNemud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/das22_10_camembert_pretrained_pipeline_en_5.5.0_3.0_1725320299825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/das22_10_camembert_pretrained_pipeline_en_5.5.0_3.0_1725320299825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("das22_10_camembert_pretrained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("das22_10_camembert_pretrained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|das22_10_camembert_pretrained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.8 MB| + +## References + +https://huggingface.co/HueyNemud/das22-10-camembert_pretrained + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-distil_task_b_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-distil_task_b_2_pipeline_en.md new file mode 100644 index 00000000000000..c6c12f8807c0ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-distil_task_b_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distil_task_b_2_pipeline pipeline DistilBertForSequenceClassification from sheduele +author: John Snow Labs +name: distil_task_b_2_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_task_b_2_pipeline` is a English model originally trained by sheduele. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_task_b_2_pipeline_en_5.5.0_3.0_1725291669169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_task_b_2_pipeline_en_5.5.0_3.0_1725291669169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distil_task_b_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distil_task_b_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_task_b_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sheduele/distil_task_B_2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-distilbert_base_uncased_finetuned_lgbt_classification_en.md b/docs/_posts/ahmedlone127/2024-09-02-distilbert_base_uncased_finetuned_lgbt_classification_en.md new file mode 100644 index 00000000000000..ea4c805102cce7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-distilbert_base_uncased_finetuned_lgbt_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_lgbt_classification DistilBertForSequenceClassification from savinda99 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_lgbt_classification +date: 2024-09-02 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_lgbt_classification` is a English model originally trained by savinda99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lgbt_classification_en_5.5.0_3.0_1725291868725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lgbt_classification_en_5.5.0_3.0_1725291868725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_lgbt_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_lgbt_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_lgbt_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.4 MB| + +## References + +https://huggingface.co/savinda99/distilbert-base-uncased-finetuned-lgbt-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-distilbert_nsfw_appropriate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-distilbert_nsfw_appropriate_pipeline_en.md new file mode 100644 index 00000000000000..f907d10783a9ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-distilbert_nsfw_appropriate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_nsfw_appropriate_pipeline pipeline DistilBertForSequenceClassification from mboachie +author: John Snow Labs +name: distilbert_nsfw_appropriate_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_nsfw_appropriate_pipeline` is a English model originally trained by mboachie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_appropriate_pipeline_en_5.5.0_3.0_1725291972294.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_appropriate_pipeline_en_5.5.0_3.0_1725291972294.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_nsfw_appropriate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_nsfw_appropriate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_nsfw_appropriate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/mboachie/distilbert_nsfw_appropriate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_finetuned_cyberbullying_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_finetuned_cyberbullying_pipeline_en.md new file mode 100644 index 00000000000000..78515b42cebe6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_finetuned_cyberbullying_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_uncased_finetuned_cyberbullying_pipeline pipeline DistilBertForSequenceClassification from SSEF-HG-AC +author: John Snow Labs +name: distilbert_uncased_finetuned_cyberbullying_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_uncased_finetuned_cyberbullying_pipeline` is a English model originally trained by SSEF-HG-AC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_uncased_finetuned_cyberbullying_pipeline_en_5.5.0_3.0_1725305537632.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_uncased_finetuned_cyberbullying_pipeline_en_5.5.0_3.0_1725305537632.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_uncased_finetuned_cyberbullying_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_uncased_finetuned_cyberbullying_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_uncased_finetuned_cyberbullying_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/SSEF-HG-AC/distilbert-uncased-finetuned-cyberbullying + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_names_en.md b/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_names_en.md new file mode 100644 index 00000000000000..2222b4a39b3951 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-distilbert_uncased_names_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_uncased_names DistilBertForTokenClassification from jackfriedson +author: John Snow Labs +name: distilbert_uncased_names +date: 2024-09-02 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_uncased_names` is a English model originally trained by jackfriedson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_uncased_names_en_5.5.0_3.0_1725267776578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_uncased_names_en_5.5.0_3.0_1725267776578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_uncased_names","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_uncased_names", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_uncased_names| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jackfriedson/distilbert-uncased-names \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-dummy_model_ankush_chander_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_ankush_chander_pipeline_en.md new file mode 100644 index 00000000000000..a6be12af0fa5f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_ankush_chander_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_ankush_chander_pipeline pipeline CamemBertEmbeddings from Ankush-Chander +author: John Snow Labs +name: dummy_model_ankush_chander_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_ankush_chander_pipeline` is a English model originally trained by Ankush-Chander. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_ankush_chander_pipeline_en_5.5.0_3.0_1725320771021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_ankush_chander_pipeline_en_5.5.0_3.0_1725320771021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_ankush_chander_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_ankush_chander_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_ankush_chander_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Ankush-Chander/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jonathanlin0707_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jonathanlin0707_pipeline_en.md new file mode 100644 index 00000000000000..989eb6f7f0e9a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jonathanlin0707_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_jonathanlin0707_pipeline pipeline CamemBertEmbeddings from JonathanLin0707 +author: John Snow Labs +name: dummy_model_jonathanlin0707_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jonathanlin0707_pipeline` is a English model originally trained by JonathanLin0707. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jonathanlin0707_pipeline_en_5.5.0_3.0_1725297671815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jonathanlin0707_pipeline_en_5.5.0_3.0_1725297671815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_jonathanlin0707_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_jonathanlin0707_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jonathanlin0707_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/JonathanLin0707/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jongyeop_en.md b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jongyeop_en.md new file mode 100644 index 00000000000000..59923934d8ab75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_jongyeop_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_jongyeop CamemBertEmbeddings from JongYeop +author: John Snow Labs +name: dummy_model_jongyeop +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jongyeop` is a English model originally trained by JongYeop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jongyeop_en_5.5.0_3.0_1725300445391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jongyeop_en_5.5.0_3.0_1725300445391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_jongyeop","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_jongyeop","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jongyeop| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/JongYeop/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-dummy_model_mindnetml_en.md b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_mindnetml_en.md new file mode 100644 index 00000000000000..469ac2be68999b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_mindnetml_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_mindnetml CamemBertEmbeddings from MindNetML +author: John Snow Labs +name: dummy_model_mindnetml +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_mindnetml` is a English model originally trained by MindNetML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_mindnetml_en_5.5.0_3.0_1725303113669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_mindnetml_en_5.5.0_3.0_1725303113669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_mindnetml","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_mindnetml","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_mindnetml| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/MindNetML/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-dummy_model_zonepg_en.md b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_zonepg_en.md new file mode 100644 index 00000000000000..d97f2063d4d0ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-dummy_model_zonepg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_zonepg CamemBertEmbeddings from ZonePG +author: John Snow Labs +name: dummy_model_zonepg +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_zonepg` is a English model originally trained by ZonePG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_zonepg_en_5.5.0_3.0_1725296402950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_zonepg_en_5.5.0_3.0_1725296402950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_zonepg","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_zonepg","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_zonepg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ZonePG/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-e5_90k_en.md b/docs/_posts/ahmedlone127/2024-09-02-e5_90k_en.md new file mode 100644 index 00000000000000..01ce6594ac47db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-e5_90k_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English e5_90k E5Embeddings from heka-ai +author: John Snow Labs +name: e5_90k +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_90k` is a English model originally trained by heka-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_90k_en_5.5.0_3.0_1725259746184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_90k_en_5.5.0_3.0_1725259746184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("e5_90k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("e5_90k","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_90k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|401.2 MB| + +## References + +https://huggingface.co/heka-ai/e5-90k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-emotion_recognition_en.md b/docs/_posts/ahmedlone127/2024-09-02-emotion_recognition_en.md new file mode 100644 index 00000000000000..029a38f2a3ab06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-emotion_recognition_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English emotion_recognition DistilBertForSequenceClassification from Atharvsharma14 +author: John Snow Labs +name: emotion_recognition +date: 2024-09-02 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emotion_recognition` is a English model originally trained by Atharvsharma14. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emotion_recognition_en_5.5.0_3.0_1725306134334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emotion_recognition_en_5.5.0_3.0_1725306134334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("emotion_recognition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("emotion_recognition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emotion_recognition| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Atharvsharma14/Emotion_Recognition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-english_hebrew_modern_base_en.md b/docs/_posts/ahmedlone127/2024-09-02-english_hebrew_modern_base_en.md new file mode 100644 index 00000000000000..b5cccf373f0784 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-english_hebrew_modern_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_hebrew_modern_base MarianTransformer from orendar +author: John Snow Labs +name: english_hebrew_modern_base +date: 2024-09-02 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_hebrew_modern_base` is a English model originally trained by orendar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_base_en_5.5.0_3.0_1725295829753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_base_en_5.5.0_3.0_1725295829753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_hebrew_modern_base","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_hebrew_modern_base","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_hebrew_modern_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|351.9 MB| + +## References + +https://huggingface.co/orendar/en_he_base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-feel_italian_finetuned_pro_emit_big8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-feel_italian_finetuned_pro_emit_big8_pipeline_en.md new file mode 100644 index 00000000000000..182f81aa30a5d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-feel_italian_finetuned_pro_emit_big8_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English feel_italian_finetuned_pro_emit_big8_pipeline pipeline CamemBertForSequenceClassification from lupobricco +author: John Snow Labs +name: feel_italian_finetuned_pro_emit_big8_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`feel_italian_finetuned_pro_emit_big8_pipeline` is a English model originally trained by lupobricco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/feel_italian_finetuned_pro_emit_big8_pipeline_en_5.5.0_3.0_1725298848330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/feel_italian_finetuned_pro_emit_big8_pipeline_en_5.5.0_3.0_1725298848330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("feel_italian_finetuned_pro_emit_big8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("feel_italian_finetuned_pro_emit_big8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|feel_italian_finetuned_pro_emit_big8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|398.1 MB| + +## References + +https://huggingface.co/lupobricco/feel_it_finetuned_pro_emit_big8 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-finance_article_titles_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-finance_article_titles_classifier_pipeline_en.md new file mode 100644 index 00000000000000..e1191c71a2e622 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-finance_article_titles_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finance_article_titles_classifier_pipeline pipeline DistilBertForSequenceClassification from hatmimoha +author: John Snow Labs +name: finance_article_titles_classifier_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finance_article_titles_classifier_pipeline` is a English model originally trained by hatmimoha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finance_article_titles_classifier_pipeline_en_5.5.0_3.0_1725291707709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finance_article_titles_classifier_pipeline_en_5.5.0_3.0_1725291707709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finance_article_titles_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finance_article_titles_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finance_article_titles_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.2 MB| + +## References + +https://huggingface.co/hatmimoha/finance-article-titles-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-finetuning_emotion_model_dearkarina_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-finetuning_emotion_model_dearkarina_pipeline_en.md new file mode 100644 index 00000000000000..bf2e96b5699868 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-finetuning_emotion_model_dearkarina_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_emotion_model_dearkarina_pipeline pipeline DistilBertForSequenceClassification from dearkarina +author: John Snow Labs +name: finetuning_emotion_model_dearkarina_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_emotion_model_dearkarina_pipeline` is a English model originally trained by dearkarina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_emotion_model_dearkarina_pipeline_en_5.5.0_3.0_1725291812210.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_emotion_model_dearkarina_pipeline_en_5.5.0_3.0_1725291812210.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_emotion_model_dearkarina_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_emotion_model_dearkarina_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_emotion_model_dearkarina_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/dearkarina/finetuning-emotion-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-ganda_english_ai_lab_makerere_en.md b/docs/_posts/ahmedlone127/2024-09-02-ganda_english_ai_lab_makerere_en.md new file mode 100644 index 00000000000000..e3ad3b4a7324d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-ganda_english_ai_lab_makerere_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ganda_english_ai_lab_makerere MarianTransformer from AI-Lab-Makerere +author: John Snow Labs +name: ganda_english_ai_lab_makerere +date: 2024-09-02 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ganda_english_ai_lab_makerere` is a English model originally trained by AI-Lab-Makerere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ganda_english_ai_lab_makerere_en_5.5.0_3.0_1725295482492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ganda_english_ai_lab_makerere_en_5.5.0_3.0_1725295482492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("ganda_english_ai_lab_makerere","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("ganda_english_ai_lab_makerere","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ganda_english_ai_lab_makerere| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.4 MB| + +## References + +https://huggingface.co/AI-Lab-Makerere/lg_en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-imdb_distilbert_base_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-imdb_distilbert_base_uncased_pipeline_en.md new file mode 100644 index 00000000000000..fe9cd28713f50e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-imdb_distilbert_base_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_distilbert_base_uncased_pipeline pipeline DistilBertForSequenceClassification from Siki-77 +author: John Snow Labs +name: imdb_distilbert_base_uncased_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_distilbert_base_uncased_pipeline` is a English model originally trained by Siki-77. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_distilbert_base_uncased_pipeline_en_5.5.0_3.0_1725292048090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_distilbert_base_uncased_pipeline_en_5.5.0_3.0_1725292048090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_distilbert_base_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_distilbert_base_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_distilbert_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Siki-77/imdb_distilbert_base_uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-indo_roberta_small_id.md b/docs/_posts/ahmedlone127/2024-09-02-indo_roberta_small_id.md new file mode 100644 index 00000000000000..15e636e8294a6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-indo_roberta_small_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian indo_roberta_small RoBertaEmbeddings from w11wo +author: John Snow Labs +name: indo_roberta_small +date: 2024-09-02 +tags: [id, open_source, onnx, embeddings, roberta] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indo_roberta_small` is a Indonesian model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indo_roberta_small_id_5.5.0_3.0_1725264257270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indo_roberta_small_id_5.5.0_3.0_1725264257270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("indo_roberta_small","id") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("indo_roberta_small","id") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indo_roberta_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|id| +|Size:|311.5 MB| + +## References + +https://huggingface.co/w11wo/indo-roberta-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-marianmt_hin_eng_czech_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-02-marianmt_hin_eng_czech_pipeline_hi.md new file mode 100644 index 00000000000000..56c5f4fc44cf12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-marianmt_hin_eng_czech_pipeline_hi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hindi marianmt_hin_eng_czech_pipeline pipeline MarianTransformer from ar5entum +author: John Snow Labs +name: marianmt_hin_eng_czech_pipeline +date: 2024-09-02 +tags: [hi, open_source, pipeline, onnx] +task: Translation +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmt_hin_eng_czech_pipeline` is a Hindi model originally trained by ar5entum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmt_hin_eng_czech_pipeline_hi_5.5.0_3.0_1725295207563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmt_hin_eng_czech_pipeline_hi_5.5.0_3.0_1725295207563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marianmt_hin_eng_czech_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marianmt_hin_eng_czech_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmt_hin_eng_czech_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|533.1 MB| + +## References + +https://huggingface.co/ar5entum/marianMT_hin_eng_cs + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-mobilebert_uncased_finetuned_squadv1_mrm8488_en.md b/docs/_posts/ahmedlone127/2024-09-02-mobilebert_uncased_finetuned_squadv1_mrm8488_en.md new file mode 100644 index 00000000000000..dc458c1366c0bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-mobilebert_uncased_finetuned_squadv1_mrm8488_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mobilebert_uncased_finetuned_squadv1_mrm8488 BertForQuestionAnswering from mrm8488 +author: John Snow Labs +name: mobilebert_uncased_finetuned_squadv1_mrm8488 +date: 2024-09-02 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mobilebert_uncased_finetuned_squadv1_mrm8488` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_finetuned_squadv1_mrm8488_en_5.5.0_3.0_1725312617500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mobilebert_uncased_finetuned_squadv1_mrm8488_en_5.5.0_3.0_1725312617500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("mobilebert_uncased_finetuned_squadv1_mrm8488","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("mobilebert_uncased_finetuned_squadv1_mrm8488", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mobilebert_uncased_finetuned_squadv1_mrm8488| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|92.5 MB| + +## References + +https://huggingface.co/mrm8488/mobilebert-uncased-finetuned-squadv1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-model_en.md b/docs/_posts/ahmedlone127/2024-09-02-model_en.md new file mode 100644 index 00000000000000..cc69e68b88992f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English model MPNetEmbeddings from Watwat100 +author: John Snow Labs +name: model +date: 2024-09-02 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model` is a English model originally trained by Watwat100. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_en_5.5.0_3.0_1725280815466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_en_5.5.0_3.0_1725280815466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Watwat100/model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-mpnet_base_nli_adaptive_layer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-mpnet_base_nli_adaptive_layer_pipeline_en.md new file mode 100644 index 00000000000000..4450a6379d3e4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-mpnet_base_nli_adaptive_layer_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_nli_adaptive_layer_pipeline pipeline MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: mpnet_base_nli_adaptive_layer_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_nli_adaptive_layer_pipeline` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_nli_adaptive_layer_pipeline_en_5.5.0_3.0_1725313990897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_nli_adaptive_layer_pipeline_en_5.5.0_3.0_1725313990897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_nli_adaptive_layer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_nli_adaptive_layer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_nli_adaptive_layer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|404.6 MB| + +## References + +https://huggingface.co/tomaarsen/mpnet-base-nli-adaptive-layer + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en.md b/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en.md new file mode 100644 index 00000000000000..1cccf040090052 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034 MarianTransformer from pankaj10034 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034 +date: 2024-09-02 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034` is a English model originally trained by pankaj10034. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en_5.5.0_3.0_1725295586305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034_en_5.5.0_3.0_1725295586305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_pankaj10034| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/pankaj10034/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en.md new file mode 100644 index 00000000000000..7b9e2a6204af1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline pipeline MarianTransformer from yonathanstwn +author: John Snow Labs +name: opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline` is a English model originally trained by yonathanstwn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en_5.5.0_3.0_1725296004290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline_en_5.5.0_3.0_1725296004290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_indonesian_english_ccmatrix_norwegian_warmup_best_bleu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|482.0 MB| + +## References + +https://huggingface.co/yonathanstwn/opus-mt-id-en-ccmatrix-no-warmup-best-bleu + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-personal_whisper_distilled_model_en.md b/docs/_posts/ahmedlone127/2024-09-02-personal_whisper_distilled_model_en.md new file mode 100644 index 00000000000000..d39ab1121fac61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-personal_whisper_distilled_model_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English personal_whisper_distilled_model WhisperForCTC from fractalego +author: John Snow Labs +name: personal_whisper_distilled_model +date: 2024-09-02 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`personal_whisper_distilled_model` is a English model originally trained by fractalego. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/personal_whisper_distilled_model_en_5.5.0_3.0_1725290475917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/personal_whisper_distilled_model_en_5.5.0_3.0_1725290475917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("personal_whisper_distilled_model","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("personal_whisper_distilled_model", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|personal_whisper_distilled_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|2.2 GB| + +## References + +https://huggingface.co/fractalego/personal-whisper-distilled-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-readabert_french_fr.md b/docs/_posts/ahmedlone127/2024-09-02-readabert_french_fr.md new file mode 100644 index 00000000000000..297359ec2b0e6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-readabert_french_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French readabert_french CamemBertForSequenceClassification from tareknaous +author: John Snow Labs +name: readabert_french +date: 2024-09-02 +tags: [fr, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readabert_french` is a French model originally trained by tareknaous. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readabert_french_fr_5.5.0_3.0_1725299123866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readabert_french_fr_5.5.0_3.0_1725299123866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("readabert_french","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("readabert_french", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readabert_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fr| +|Size:|393.2 MB| + +## References + +https://huggingface.co/tareknaous/readabert-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-roberta_ner_roberta_base_tweetner_2020_2021_continuous_en.md b/docs/_posts/ahmedlone127/2024-09-02-roberta_ner_roberta_base_tweetner_2020_2021_continuous_en.md new file mode 100644 index 00000000000000..ccbda85184240e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-roberta_ner_roberta_base_tweetner_2020_2021_continuous_en.md @@ -0,0 +1,112 @@ +--- +layout: model +title: English RobertaForTokenClassification Base Cased model (from tner) +author: John Snow Labs +name: roberta_ner_roberta_base_tweetner_2020_2021_continuous +date: 2024-09-02 +tags: [bert, ner, open_source, en, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-base-tweetner-2020-2021-continuous` is a English model originally trained by `tner`. + +## Predicted Entities + +`group`, `creative_work`, `person`, `event`, `corporation`, `location`, `product` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_base_tweetner_2020_2021_continuous_en_5.5.0_3.0_1725311564110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_base_tweetner_2020_2021_continuous_en_5.5.0_3.0_1725311564110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols("sentence") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_roberta_base_tweetner_2020_2021_continuous","en") \ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols(Array("sentence")) + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_roberta_base_tweetner_2020_2021_continuous","en") + .setInputCols(Array("sentence", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler,sentenceDetector, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.ner.roberta.tweet.tweetner_2020_2021_continuous.base.by_tner").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ner_roberta_base_tweetner_2020_2021_continuous| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|443.6 MB| + +## References + +References + +- https://huggingface.co/tner/roberta-base-tweetner-2020-2021-continuous \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-02-roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es.md new file mode 100644 index 00000000000000..4598d1cc59cb10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Castilian, Spanish roberta_qa_base_spanish_squades_becasincentivos4_pipeline pipeline RoBertaForQuestionAnswering from Evelyn18 +author: John Snow Labs +name: roberta_qa_base_spanish_squades_becasincentivos4_pipeline +date: 2024-09-02 +tags: [es, open_source, pipeline, onnx] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_base_spanish_squades_becasincentivos4_pipeline` is a Castilian, Spanish model originally trained by Evelyn18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es_5.5.0_3.0_1725252106437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos4_pipeline_es_5.5.0_3.0_1725252106437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_base_spanish_squades_becasincentivos4_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_base_spanish_squades_becasincentivos4_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_spanish_squades_becasincentivos4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|459.1 MB| + +## References + +https://huggingface.co/Evelyn18/roberta-base-spanish-squades-becasIncentivos4 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-securebert_cyner_en.md b/docs/_posts/ahmedlone127/2024-09-02-securebert_cyner_en.md new file mode 100644 index 00000000000000..a58305a70fea12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-securebert_cyner_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English securebert_cyner RoBertaForTokenClassification from anonymouspd +author: John Snow Labs +name: securebert_cyner +date: 2024-09-02 +tags: [roberta, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`securebert_cyner` is a English model originally trained by anonymouspd. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/securebert_cyner_en_5.5.0_3.0_1725311409129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/securebert_cyner_en_5.5.0_3.0_1725311409129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols(["document"]) \ + .setOutputCol("token") + + +tokenClassifier = RoBertaForTokenClassification.pretrained("securebert_cyner","en") \ + .setInputCols(["document","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = Tokenizer() \ + .setInputCols(Array("document")) \ + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification + .pretrained("securebert_cyner", "en") + .setInputCols(Array("document","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|securebert_cyner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.1 MB| + +## References + +References + +https://huggingface.co/anonymouspd/SecureBERT-CyNER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-sent_bert_base_greek_uncased_v1_el.md b/docs/_posts/ahmedlone127/2024-09-02-sent_bert_base_greek_uncased_v1_el.md new file mode 100644 index 00000000000000..d7919560f56b6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-sent_bert_base_greek_uncased_v1_el.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Modern Greek (1453-) sent_bert_base_greek_uncased_v1 BertSentenceEmbeddings from nlpaueb +author: John Snow Labs +name: sent_bert_base_greek_uncased_v1 +date: 2024-09-02 +tags: [el, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: el +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_greek_uncased_v1` is a Modern Greek (1453-) model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_greek_uncased_v1_el_5.5.0_3.0_1725273267564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_greek_uncased_v1_el_5.5.0_3.0_1725273267564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_greek_uncased_v1","el") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_greek_uncased_v1","el") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_greek_uncased_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|421.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-greek-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-sent_bert_large_portuguese_cased_pt.md b/docs/_posts/ahmedlone127/2024-09-02-sent_bert_large_portuguese_cased_pt.md new file mode 100644 index 00000000000000..da6c98ab96848b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-sent_bert_large_portuguese_cased_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese sent_bert_large_portuguese_cased BertSentenceEmbeddings from neuralmind +author: John Snow Labs +name: sent_bert_large_portuguese_cased +date: 2024-09-02 +tags: [pt, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_large_portuguese_cased` is a Portuguese model originally trained by neuralmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_large_portuguese_cased_pt_5.5.0_3.0_1725273320196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_large_portuguese_cased_pt_5.5.0_3.0_1725273320196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_large_portuguese_cased","pt") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_large_portuguese_cased","pt") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_large_portuguese_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/neuralmind/bert-large-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-sitexsometre_camembert_base_ccnet_stsb200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-sitexsometre_camembert_base_ccnet_stsb200_pipeline_en.md new file mode 100644 index 00000000000000..81062c0b83ac59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-sitexsometre_camembert_base_ccnet_stsb200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sitexsometre_camembert_base_ccnet_stsb200_pipeline pipeline CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_base_ccnet_stsb200_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_base_ccnet_stsb200_pipeline` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_base_ccnet_stsb200_pipeline_en_5.5.0_3.0_1725299037745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_base_ccnet_stsb200_pipeline_en_5.5.0_3.0_1725299037745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sitexsometre_camembert_base_ccnet_stsb200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sitexsometre_camembert_base_ccnet_stsb200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_base_ccnet_stsb200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.8 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-base-ccnet-stsb200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-sloberta_slo_word_spelling_annotator_sl.md b/docs/_posts/ahmedlone127/2024-09-02-sloberta_slo_word_spelling_annotator_sl.md new file mode 100644 index 00000000000000..e3abe59a82d3be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-sloberta_slo_word_spelling_annotator_sl.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Slovenian sloberta_slo_word_spelling_annotator CamemBertEmbeddings from cjvt +author: John Snow Labs +name: sloberta_slo_word_spelling_annotator +date: 2024-09-02 +tags: [sl, open_source, onnx, embeddings, camembert] +task: Embeddings +language: sl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sloberta_slo_word_spelling_annotator` is a Slovenian model originally trained by cjvt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sloberta_slo_word_spelling_annotator_sl_5.5.0_3.0_1725297530491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sloberta_slo_word_spelling_annotator_sl_5.5.0_3.0_1725297530491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("sloberta_slo_word_spelling_annotator","sl") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("sloberta_slo_word_spelling_annotator","sl") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sloberta_slo_word_spelling_annotator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|sl| +|Size:|412.1 MB| + +## References + +https://huggingface.co/cjvt/SloBERTa-slo-word-spelling-annotator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-temp_checkpoints_en.md b/docs/_posts/ahmedlone127/2024-09-02-temp_checkpoints_en.md new file mode 100644 index 00000000000000..431ea4979aa34b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-temp_checkpoints_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English temp_checkpoints DistilBertForSequenceClassification from FilippoLampa +author: John Snow Labs +name: temp_checkpoints +date: 2024-09-02 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`temp_checkpoints` is a English model originally trained by FilippoLampa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/temp_checkpoints_en_5.5.0_3.0_1725306297521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/temp_checkpoints_en_5.5.0_3.0_1725306297521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("temp_checkpoints","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("temp_checkpoints", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|temp_checkpoints| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/FilippoLampa/temp_checkpoints \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-tiny_random_clipmodel_hf_tiny_model_private_en.md b/docs/_posts/ahmedlone127/2024-09-02-tiny_random_clipmodel_hf_tiny_model_private_en.md new file mode 100644 index 00000000000000..b8ab8d2e6b997f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-tiny_random_clipmodel_hf_tiny_model_private_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English tiny_random_clipmodel_hf_tiny_model_private CLIPForZeroShotClassification from hf-tiny-model-private +author: John Snow Labs +name: tiny_random_clipmodel_hf_tiny_model_private +date: 2024-09-02 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_random_clipmodel_hf_tiny_model_private` is a English model originally trained by hf-tiny-model-private. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_random_clipmodel_hf_tiny_model_private_en_5.5.0_3.0_1725275825586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_random_clipmodel_hf_tiny_model_private_en_5.5.0_3.0_1725275825586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("tiny_random_clipmodel_hf_tiny_model_private","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("tiny_random_clipmodel_hf_tiny_model_private","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_random_clipmodel_hf_tiny_model_private| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|536.4 KB| + +## References + +https://huggingface.co/hf-tiny-model-private/tiny-random-CLIPModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-transformer_maltese_en.md b/docs/_posts/ahmedlone127/2024-09-02-transformer_maltese_en.md new file mode 100644 index 00000000000000..390a1813301840 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-transformer_maltese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English transformer_maltese MarianTransformer from leenag +author: John Snow Labs +name: transformer_maltese +date: 2024-09-02 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`transformer_maltese` is a English model originally trained by leenag. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/transformer_maltese_en_5.5.0_3.0_1725303915053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/transformer_maltese_en_5.5.0_3.0_1725303915053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("transformer_maltese","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("transformer_maltese","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|transformer_maltese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|522.8 MB| + +## References + +https://huggingface.co/leenag/Transformer_MT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-turkish2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-turkish2_pipeline_en.md new file mode 100644 index 00000000000000..3a4277f4153c00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-turkish2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English turkish2_pipeline pipeline MarianTransformer from PontifexMaximus +author: John Snow Labs +name: turkish2_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish2_pipeline` is a English model originally trained by PontifexMaximus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish2_pipeline_en_5.5.0_3.0_1725243503570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish2_pipeline_en_5.5.0_3.0_1725243503570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("turkish2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("turkish2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|525.8 MB| + +## References + +https://huggingface.co/PontifexMaximus/Turkish2 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-whisper_base_quran_ai_by_tarteel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-02-whisper_base_quran_ai_by_tarteel_pipeline_en.md new file mode 100644 index 00000000000000..fadf67bd0fc267 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-whisper_base_quran_ai_by_tarteel_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_base_quran_ai_by_tarteel_pipeline pipeline WhisperForCTC from Democtic +author: John Snow Labs +name: whisper_base_quran_ai_by_tarteel_pipeline +date: 2024-09-02 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_quran_ai_by_tarteel_pipeline` is a English model originally trained by Democtic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_quran_ai_by_tarteel_pipeline_en_5.5.0_3.0_1725289788539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_quran_ai_by_tarteel_pipeline_en_5.5.0_3.0_1725289788539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_quran_ai_by_tarteel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_quran_ai_by_tarteel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_quran_ai_by_tarteel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|643.1 MB| + +## References + +https://huggingface.co/Democtic/whisper-base-quran-ai-by-tarteel + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-whisper_small_english_jenrish_en.md b/docs/_posts/ahmedlone127/2024-09-02-whisper_small_english_jenrish_en.md new file mode 100644 index 00000000000000..6b1cdda337db57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-whisper_small_english_jenrish_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_english_jenrish WhisperForCTC from jenrish +author: John Snow Labs +name: whisper_small_english_jenrish +date: 2024-09-02 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_jenrish` is a English model originally trained by jenrish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_jenrish_en_5.5.0_3.0_1725289595781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_jenrish_en_5.5.0_3.0_1725289595781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_english_jenrish","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_english_jenrish", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_jenrish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jenrish/whisper-small-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-02-whisper_small_taiwanese_minnan_take2_en.md b/docs/_posts/ahmedlone127/2024-09-02-whisper_small_taiwanese_minnan_take2_en.md new file mode 100644 index 00000000000000..b1ff17513f2719 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-02-whisper_small_taiwanese_minnan_take2_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_taiwanese_minnan_take2 WhisperForCTC from caotun +author: John Snow Labs +name: whisper_small_taiwanese_minnan_take2 +date: 2024-09-02 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_taiwanese_minnan_take2` is a English model originally trained by caotun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_taiwanese_minnan_take2_en_5.5.0_3.0_1725284854884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_taiwanese_minnan_take2_en_5.5.0_3.0_1725284854884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_taiwanese_minnan_take2","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_taiwanese_minnan_take2", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_taiwanese_minnan_take2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/caotun/whisper-small-taiwanese-minnan-take2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-accu_0_en.md b/docs/_posts/ahmedlone127/2024-09-03-accu_0_en.md new file mode 100644 index 00000000000000..4f14888444a3b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-accu_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English accu_0 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_0 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_0` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_0_en_5.5.0_3.0_1725369385171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_0_en_5.5.0_3.0_1725369385171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-aigc_detector_env1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-aigc_detector_env1_pipeline_en.md new file mode 100644 index 00000000000000..4eae8d2387be03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-aigc_detector_env1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English aigc_detector_env1_pipeline pipeline RoBertaForSequenceClassification from yuchuantian +author: John Snow Labs +name: aigc_detector_env1_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aigc_detector_env1_pipeline` is a English model originally trained by yuchuantian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aigc_detector_env1_pipeline_en_5.5.0_3.0_1725337367286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aigc_detector_env1_pipeline_en_5.5.0_3.0_1725337367286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("aigc_detector_env1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("aigc_detector_env1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aigc_detector_env1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|462.7 MB| + +## References + +https://huggingface.co/yuchuantian/AIGC_detector_env1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-al_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-03-al_roberta_base_en.md new file mode 100644 index 00000000000000..39260e3423718c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-al_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English al_roberta_base RoBertaEmbeddings from macedonizer +author: John Snow Labs +name: al_roberta_base +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`al_roberta_base` is a English model originally trained by macedonizer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/al_roberta_base_en_5.5.0_3.0_1725375596318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/al_roberta_base_en_5.5.0_3.0_1725375596318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("al_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("al_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|al_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.7 MB| + +## References + +https://huggingface.co/macedonizer/al-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-albert_base_qa_2_k_fold_3_en.md b/docs/_posts/ahmedlone127/2024-09-03-albert_base_qa_2_k_fold_3_en.md new file mode 100644 index 00000000000000..cf21a4d9905ed7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-albert_base_qa_2_k_fold_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English albert_base_qa_2_k_fold_3 AlbertForQuestionAnswering from mateiaass +author: John Snow Labs +name: albert_base_qa_2_k_fold_3 +date: 2024-09-03 +tags: [en, open_source, onnx, question_answering, albert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_qa_2_k_fold_3` is a English model originally trained by mateiaass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_qa_2_k_fold_3_en_5.5.0_3.0_1725341726552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_qa_2_k_fold_3_en_5.5.0_3.0_1725341726552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_base_qa_2_k_fold_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering.pretrained("albert_base_qa_2_k_fold_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_qa_2_k_fold_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/mateiaass/albert-base-qa-2-k-fold-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_eclass_gart_labor_en.md b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_eclass_gart_labor_en.md new file mode 100644 index 00000000000000..5b7280a6f04b81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_eclass_gart_labor_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_eclass_gart_labor MPNetEmbeddings from gart-labor +author: John Snow Labs +name: all_mpnet_base_v2_eclass_gart_labor +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_eclass_gart_labor` is a English model originally trained by gart-labor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_eclass_gart_labor_en_5.5.0_3.0_1725350673717.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_eclass_gart_labor_en_5.5.0_3.0_1725350673717.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_eclass_gart_labor","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_eclass_gart_labor","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_eclass_gart_labor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/gart-labor/all-mpnet-base-v2-eclass \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en.md new file mode 100644 index 00000000000000..b084271c5d7848 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline pipeline MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en_5.5.0_3.0_1725351044897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline_en_5.5.0_3.0_1725351044897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_lr_2e_7_margin_1_epoch_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-v2-lr-2e-7-margin-1-epoch-3 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en.md new file mode 100644 index 00000000000000..ede5446bfc42c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_southern_sotho_out_sim_pipeline pipeline MPNetEmbeddings from laiking +author: John Snow Labs +name: all_mpnet_base_v2_southern_sotho_out_sim_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_southern_sotho_out_sim_pipeline` is a English model originally trained by laiking. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en_5.5.0_3.0_1725350888082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_southern_sotho_out_sim_pipeline_en_5.5.0_3.0_1725350888082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_southern_sotho_out_sim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_southern_sotho_out_sim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_southern_sotho_out_sim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/laiking/all-mpnet-base-v2-st-out-sim + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_sts_juanignaciosolerno_en.md b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_sts_juanignaciosolerno_en.md new file mode 100644 index 00000000000000..975654f4cfea05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-all_mpnet_base_v2_sts_juanignaciosolerno_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_sts_juanignaciosolerno MPNetEmbeddings from JuanIgnacioSolerno +author: John Snow Labs +name: all_mpnet_base_v2_sts_juanignaciosolerno +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_sts_juanignaciosolerno` is a English model originally trained by JuanIgnacioSolerno. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_sts_juanignaciosolerno_en_5.5.0_3.0_1725350247407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_sts_juanignaciosolerno_en_5.5.0_3.0_1725350247407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_sts_juanignaciosolerno","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_sts_juanignaciosolerno","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_sts_juanignaciosolerno| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/JuanIgnacioSolerno/all-mpnet-base-v2-sts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-autotrain_okr_iptal_3196789879_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-autotrain_okr_iptal_3196789879_pipeline_en.md new file mode 100644 index 00000000000000..d0e89e22b9ae26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-autotrain_okr_iptal_3196789879_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autotrain_okr_iptal_3196789879_pipeline pipeline XlmRoBertaForSequenceClassification from ekincanozcelik +author: John Snow Labs +name: autotrain_okr_iptal_3196789879_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_okr_iptal_3196789879_pipeline` is a English model originally trained by ekincanozcelik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_3196789879_pipeline_en_5.5.0_3.0_1725395814519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_3196789879_pipeline_en_5.5.0_3.0_1725395814519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_okr_iptal_3196789879_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_okr_iptal_3196789879_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_okr_iptal_3196789879_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|777.0 MB| + +## References + +https://huggingface.co/ekincanozcelik/autotrain-okr_iptal-3196789879 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-baai_bge_large_english_v1_5_fine_tuned_en.md b/docs/_posts/ahmedlone127/2024-09-03-baai_bge_large_english_v1_5_fine_tuned_en.md new file mode 100644 index 00000000000000..135efe16854153 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-baai_bge_large_english_v1_5_fine_tuned_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English baai_bge_large_english_v1_5_fine_tuned BGEEmbeddings from rjnClarke +author: John Snow Labs +name: baai_bge_large_english_v1_5_fine_tuned +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`baai_bge_large_english_v1_5_fine_tuned` is a English model originally trained by rjnClarke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/baai_bge_large_english_v1_5_fine_tuned_en_5.5.0_3.0_1725357244870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/baai_bge_large_english_v1_5_fine_tuned_en_5.5.0_3.0_1725357244870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("baai_bge_large_english_v1_5_fine_tuned","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("baai_bge_large_english_v1_5_fine_tuned","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|baai_bge_large_english_v1_5_fine_tuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/rjnClarke/BAAI-bge-large-en-v1.5-fine-tuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..3792ab6cd5fa0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en_5.5.0_3.0_1725370411426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline_en_5.5.0_3.0_1725370411426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia_french1_25m_wikipedia1_1_25m_with_masking_seed3_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/BabyBERTa-wikipedia_french1.25M_wikipedia1_1.25M-with-Masking-seed3-finetuned-SQuAD + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bert_base_uncased_finetuned_squad_frozen_v1_en.md b/docs/_posts/ahmedlone127/2024-09-03-bert_base_uncased_finetuned_squad_frozen_v1_en.md new file mode 100644 index 00000000000000..01c18c65adac81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bert_base_uncased_finetuned_squad_frozen_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_squad_frozen_v1 BertForQuestionAnswering from ericRosello +author: John Snow Labs +name: bert_base_uncased_finetuned_squad_frozen_v1 +date: 2024-09-03 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_squad_frozen_v1` is a English model originally trained by ericRosello. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_squad_frozen_v1_en_5.5.0_3.0_1725352286648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_squad_frozen_v1_en_5.5.0_3.0_1725352286648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_finetuned_squad_frozen_v1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_base_uncased_finetuned_squad_frozen_v1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_squad_frozen_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ericRosello/bert-base-uncased-finetuned-squad-frozen-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_en.md b/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_en.md new file mode 100644 index 00000000000000..075009c5dcd55f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_categorizer DistilBertForSequenceClassification from DavinciTech +author: John Snow Labs +name: bert_categorizer +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_categorizer` is a English model originally trained by DavinciTech. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_categorizer_en_5.5.0_3.0_1725394252730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_categorizer_en_5.5.0_3.0_1725394252730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_categorizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_categorizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_categorizer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/DavinciTech/BERT_Categorizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_pipeline_en.md new file mode 100644 index 00000000000000..8fca20fefe604a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bert_categorizer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_categorizer_pipeline pipeline DistilBertForSequenceClassification from DavinciTech +author: John Snow Labs +name: bert_categorizer_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_categorizer_pipeline` is a English model originally trained by DavinciTech. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_categorizer_pipeline_en_5.5.0_3.0_1725394267280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_categorizer_pipeline_en_5.5.0_3.0_1725394267280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_categorizer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_categorizer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_categorizer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/DavinciTech/BERT_Categorizer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bge_base_financial_matryoshka_dustyatx_en.md b/docs/_posts/ahmedlone127/2024-09-03-bge_base_financial_matryoshka_dustyatx_en.md new file mode 100644 index 00000000000000..6207791e9de67f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bge_base_financial_matryoshka_dustyatx_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_financial_matryoshka_dustyatx BGEEmbeddings from dustyatx +author: John Snow Labs +name: bge_base_financial_matryoshka_dustyatx +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_financial_matryoshka_dustyatx` is a English model originally trained by dustyatx. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_dustyatx_en_5.5.0_3.0_1725356384225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_dustyatx_en_5.5.0_3.0_1725356384225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_dustyatx","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_dustyatx","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_financial_matryoshka_dustyatx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|387.1 MB| + +## References + +https://huggingface.co/dustyatx/bge-base-financial-matryoshka \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_dcpr_tuned_teachafy_en.md b/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_dcpr_tuned_teachafy_en.md new file mode 100644 index 00000000000000..469f489ad8a033 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_dcpr_tuned_teachafy_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_small_english_dcpr_tuned_teachafy BGEEmbeddings from Teachafy +author: John Snow Labs +name: bge_small_english_dcpr_tuned_teachafy +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_dcpr_tuned_teachafy` is a English model originally trained by Teachafy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_dcpr_tuned_teachafy_en_5.5.0_3.0_1725356355999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_dcpr_tuned_teachafy_en_5.5.0_3.0_1725356355999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_small_english_dcpr_tuned_teachafy","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_small_english_dcpr_tuned_teachafy","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_dcpr_tuned_teachafy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|115.2 MB| + +## References + +https://huggingface.co/Teachafy/bge-small-en-dcpr-tuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_v1_5_ft_orc_0813_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_v1_5_ft_orc_0813_pipeline_en.md new file mode 100644 index 00000000000000..c7911857bae778 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bge_small_english_v1_5_ft_orc_0813_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_small_english_v1_5_ft_orc_0813_pipeline pipeline BGEEmbeddings from magnifi +author: John Snow Labs +name: bge_small_english_v1_5_ft_orc_0813_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_ft_orc_0813_pipeline` is a English model originally trained by magnifi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0813_pipeline_en_5.5.0_3.0_1725357218154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_ft_orc_0813_pipeline_en_5.5.0_3.0_1725357218154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_small_english_v1_5_ft_orc_0813_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_small_english_v1_5_ft_orc_0813_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_ft_orc_0813_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|110.2 MB| + +## References + +https://huggingface.co/magnifi/bge-small-en-v1.5-ft-orc-0813 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en.md new file mode 100644 index 00000000000000..16ec26eab5aaac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline pipeline MPNetEmbeddings from teven +author: John Snow Labs +name: bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en_5.5.0_3.0_1725350726997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline_en_5.5.0_3.0_1725350726997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bislama_all_mpnet_base_v2_finetuned_webnlg2020_metric_average_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/teven/bi_all-mpnet-base-v2_finetuned_WebNLG2020_metric_average + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-bm_french_pipeline_bm.md b/docs/_posts/ahmedlone127/2024-09-03-bm_french_pipeline_bm.md new file mode 100644 index 00000000000000..56528b42bd436e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-bm_french_pipeline_bm.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Bambara bm_french_pipeline pipeline MarianTransformer from Ife +author: John Snow Labs +name: bm_french_pipeline +date: 2024-09-03 +tags: [bm, open_source, pipeline, onnx] +task: Translation +language: bm +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bm_french_pipeline` is a Bambara model originally trained by Ife. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bm_french_pipeline_bm_5.5.0_3.0_1725346825499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bm_french_pipeline_bm_5.5.0_3.0_1725346825499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bm_french_pipeline", lang = "bm") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bm_french_pipeline", lang = "bm") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bm_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|bm| +|Size:|508.3 MB| + +## References + +https://huggingface.co/Ife/BM-FR + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-brahmai_clip_v0_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-brahmai_clip_v0_1_pipeline_en.md new file mode 100644 index 00000000000000..ad1b377e7a0bda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-brahmai_clip_v0_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English brahmai_clip_v0_1_pipeline pipeline CLIPForZeroShotClassification from brahmairesearch +author: John Snow Labs +name: brahmai_clip_v0_1_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`brahmai_clip_v0_1_pipeline` is a English model originally trained by brahmairesearch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/brahmai_clip_v0_1_pipeline_en_5.5.0_3.0_1725339485258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/brahmai_clip_v0_1_pipeline_en_5.5.0_3.0_1725339485258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("brahmai_clip_v0_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("brahmai_clip_v0_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|brahmai_clip_v0_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/brahmairesearch/brahmai-clip-v0.1 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-burmese_awesome_model_asmiishripad18_en.md b/docs/_posts/ahmedlone127/2024-09-03-burmese_awesome_model_asmiishripad18_en.md new file mode 100644 index 00000000000000..bf393e3eb6ec6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-burmese_awesome_model_asmiishripad18_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_asmiishripad18 DistilBertForSequenceClassification from asmiishripad18 +author: John Snow Labs +name: burmese_awesome_model_asmiishripad18 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_asmiishripad18` is a English model originally trained by asmiishripad18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_asmiishripad18_en_5.5.0_3.0_1725394246137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_asmiishripad18_en_5.5.0_3.0_1725394246137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_asmiishripad18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_asmiishripad18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_asmiishripad18| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/asmiishripad18/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-burmese_fine_tuned_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-03-burmese_fine_tuned_distilbert_en.md new file mode 100644 index 00000000000000..f2e3cd14c8b822 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-burmese_fine_tuned_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_fine_tuned_distilbert DistilBertForSequenceClassification from Benuehlinger +author: John Snow Labs +name: burmese_fine_tuned_distilbert +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_fine_tuned_distilbert` is a English model originally trained by Benuehlinger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_fine_tuned_distilbert_en_5.5.0_3.0_1725329642861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_fine_tuned_distilbert_en_5.5.0_3.0_1725329642861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_fine_tuned_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_fine_tuned_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_fine_tuned_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Benuehlinger/my-fine-tuned-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-classificateur_intention_camembert_en.md b/docs/_posts/ahmedlone127/2024-09-03-classificateur_intention_camembert_en.md new file mode 100644 index 00000000000000..aa2bdef6ccab28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-classificateur_intention_camembert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classificateur_intention_camembert CamemBertForSequenceClassification from DioulaD +author: John Snow Labs +name: classificateur_intention_camembert +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classificateur_intention_camembert` is a English model originally trained by DioulaD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classificateur_intention_camembert_en_5.5.0_3.0_1725378382264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classificateur_intention_camembert_en_5.5.0_3.0_1725378382264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("classificateur_intention_camembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("classificateur_intention_camembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classificateur_intention_camembert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|388.1 MB| + +## References + +https://huggingface.co/DioulaD/classificateur-intention_camembert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-cleaned_bert_base_cased_500_620e5b_en.md b/docs/_posts/ahmedlone127/2024-09-03-cleaned_bert_base_cased_500_620e5b_en.md new file mode 100644 index 00000000000000..025e3d7ea480af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-cleaned_bert_base_cased_500_620e5b_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cleaned_bert_base_cased_500_620e5b E5Embeddings from rithwik-db +author: John Snow Labs +name: cleaned_bert_base_cased_500_620e5b +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cleaned_bert_base_cased_500_620e5b` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cleaned_bert_base_cased_500_620e5b_en_5.5.0_3.0_1725332515989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cleaned_bert_base_cased_500_620e5b_en_5.5.0_3.0_1725332515989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("cleaned_bert_base_cased_500_620e5b","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("cleaned_bert_base_cased_500_620e5b","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cleaned_bert_base_cased_500_620e5b| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/rithwik-db/cleaned-bert-base-cased-500-620e5b \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-clip_vit_large_patch14_baseplate_en.md b/docs/_posts/ahmedlone127/2024-09-03-clip_vit_large_patch14_baseplate_en.md new file mode 100644 index 00000000000000..8ac71b44c184b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-clip_vit_large_patch14_baseplate_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_baseplate CLIPForZeroShotClassification from baseplate +author: John Snow Labs +name: clip_vit_large_patch14_baseplate +date: 2024-09-03 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_baseplate` is a English model originally trained by baseplate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_baseplate_en_5.5.0_3.0_1725338922117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_baseplate_en_5.5.0_3.0_1725338922117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_baseplate","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_baseplate","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_baseplate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/baseplate/clip-vit-large-patch14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-coptic_english_translator_en.md b/docs/_posts/ahmedlone127/2024-09-03-coptic_english_translator_en.md new file mode 100644 index 00000000000000..82017afee10703 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-coptic_english_translator_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English coptic_english_translator MarianTransformer from megalaa +author: John Snow Labs +name: coptic_english_translator +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`coptic_english_translator` is a English model originally trained by megalaa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/coptic_english_translator_en_5.5.0_3.0_1725346252940.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/coptic_english_translator_en_5.5.0_3.0_1725346252940.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("coptic_english_translator","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("coptic_english_translator","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|coptic_english_translator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.6 MB| + +## References + +https://huggingface.co/megalaa/coptic-english-translator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_base_mmarcofr_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_base_mmarcofr_pipeline_fr.md new file mode 100644 index 00000000000000..0785391d78ef73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_base_mmarcofr_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French crossencoder_camembert_base_mmarcofr_pipeline pipeline CamemBertForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_camembert_base_mmarcofr_pipeline +date: 2024-09-03 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_camembert_base_mmarcofr_pipeline` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_base_mmarcofr_pipeline_fr_5.5.0_3.0_1725378187379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_base_mmarcofr_pipeline_fr_5.5.0_3.0_1725378187379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("crossencoder_camembert_base_mmarcofr_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("crossencoder_camembert_base_mmarcofr_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_camembert_base_mmarcofr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|414.8 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-camembert-base-mmarcoFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_l4_mmarcofr_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_l4_mmarcofr_pipeline_fr.md new file mode 100644 index 00000000000000..5a8de20c95ce04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_camembert_l4_mmarcofr_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French crossencoder_camembert_l4_mmarcofr_pipeline pipeline CamemBertForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_camembert_l4_mmarcofr_pipeline +date: 2024-09-03 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_camembert_l4_mmarcofr_pipeline` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l4_mmarcofr_pipeline_fr_5.5.0_3.0_1725378348947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l4_mmarcofr_pipeline_fr_5.5.0_3.0_1725378348947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("crossencoder_camembert_l4_mmarcofr_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("crossencoder_camembert_l4_mmarcofr_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_camembert_l4_mmarcofr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|202.2 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-camembert-L4-mmarcoFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr.md new file mode 100644 index 00000000000000..eca4344f66888a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French crossencoder_xlm_roberta_base_mmarcofr_pipeline pipeline XlmRoBertaForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_xlm_roberta_base_mmarcofr_pipeline +date: 2024-09-03 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_xlm_roberta_base_mmarcofr_pipeline` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr_5.5.0_3.0_1725396167346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_xlm_roberta_base_mmarcofr_pipeline_fr_5.5.0_3.0_1725396167346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("crossencoder_xlm_roberta_base_mmarcofr_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("crossencoder_xlm_roberta_base_mmarcofr_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_xlm_roberta_base_mmarcofr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|853.6 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-xlm-roberta-base-mmarcoFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-dataequity_opus_maltese_german_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-dataequity_opus_maltese_german_english_pipeline_en.md new file mode 100644 index 00000000000000..2510de2f297bb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-dataequity_opus_maltese_german_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dataequity_opus_maltese_german_english_pipeline pipeline MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_opus_maltese_german_english_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_opus_maltese_german_english_pipeline` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_german_english_pipeline_en_5.5.0_3.0_1725345875891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_german_english_pipeline_en_5.5.0_3.0_1725345875891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dataequity_opus_maltese_german_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dataequity_opus_maltese_german_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_opus_maltese_german_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|500.0 MB| + +## References + +https://huggingface.co/dataequity/dataequity-opus-mt-de-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_base_pipeline_en.md new file mode 100644 index 00000000000000..e1439273045c1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_pipeline pipeline DeBertaForTokenClassification from EverDarling +author: John Snow Labs +name: deberta_v3_base_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_pipeline` is a English model originally trained by EverDarling. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_pipeline_en_5.5.0_3.0_1725400435584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_pipeline_en_5.5.0_3.0_1725400435584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|687.7 MB| + +## References + +https://huggingface.co/EverDarling/deberta-v3-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en.md b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en.md new file mode 100644 index 00000000000000..17ac94834ff67a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1 DeBertaEmbeddings from quastrinos +author: John Snow Labs +name: deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, deberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1` is a English model originally trained by quastrinos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en_5.5.0_3.0_1725376521475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1_en_5.5.0_3.0_1725376521475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DeBertaEmbeddings.pretrained("deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DeBertaEmbeddings.pretrained("deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_mlm_accelerate_v3_02_xp_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[deberta]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/quastrinos/deberta-v3-large-finetuned-mlm-accelerate-v3-02-xp-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_hf_llm_en.md b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_hf_llm_en.md new file mode 100644 index 00000000000000..d60852c8eb148f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-deberta_v3_large_hf_llm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_hf_llm DeBertaEmbeddings from nagupv +author: John Snow Labs +name: deberta_v3_large_hf_llm +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, deberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_hf_llm` is a English model originally trained by nagupv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_hf_llm_en_5.5.0_3.0_1725377592523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_hf_llm_en_5.5.0_3.0_1725377592523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DeBertaEmbeddings.pretrained("deberta_v3_large_hf_llm","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DeBertaEmbeddings.pretrained("deberta_v3_large_hf_llm","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_hf_llm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[deberta]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/nagupv/deberta-v3-large-hf-llm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-detect_femicide_news_xlmr_dutch_fft_freeze2_en.md b/docs/_posts/ahmedlone127/2024-09-03-detect_femicide_news_xlmr_dutch_fft_freeze2_en.md new file mode 100644 index 00000000000000..2cbaf484118cce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-detect_femicide_news_xlmr_dutch_fft_freeze2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English detect_femicide_news_xlmr_dutch_fft_freeze2 XlmRoBertaForSequenceClassification from gossminn +author: John Snow Labs +name: detect_femicide_news_xlmr_dutch_fft_freeze2 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`detect_femicide_news_xlmr_dutch_fft_freeze2` is a English model originally trained by gossminn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/detect_femicide_news_xlmr_dutch_fft_freeze2_en_5.5.0_3.0_1725395228458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/detect_femicide_news_xlmr_dutch_fft_freeze2_en_5.5.0_3.0_1725395228458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("detect_femicide_news_xlmr_dutch_fft_freeze2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("detect_femicide_news_xlmr_dutch_fft_freeze2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|detect_femicide_news_xlmr_dutch_fft_freeze2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|863.0 MB| + +## References + +https://huggingface.co/gossminn/detect-femicide-news-xlmr-nl-fft-freeze2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx.md new file mode 100644 index 00000000000000..2d1fed8edf0e5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_bible_pipeline pipeline DistilBertEmbeddings from Pragash-Mohanarajah +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_bible_pipeline +date: 2024-09-03 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_bible_pipeline` is a Multilingual model originally trained by Pragash-Mohanarajah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx_5.5.0_3.0_1725384669011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_bible_pipeline_xx_5.5.0_3.0_1725384669011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_multilingual_cased_finetuned_bible_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_multilingual_cased_finetuned_bible_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_bible_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/Pragash-Mohanarajah/distilbert-base-multilingual-cased-finetuned-bible + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_xx.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_xx.md new file mode 100644 index 00000000000000..63d7171b8ad043 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_multilingual_cased_finetuned_bible_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_bible DistilBertEmbeddings from Pragash-Mohanarajah +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_bible +date: 2024-09-03 +tags: [xx, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_bible` is a Multilingual model originally trained by Pragash-Mohanarajah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_bible_xx_5.5.0_3.0_1725384642281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_bible_xx_5.5.0_3.0_1725384642281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_bible","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_bible","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_bible| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/Pragash-Mohanarajah/distilbert-base-multilingual-cased-finetuned-bible \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_deletion_multiclass_complete_final_v2_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_deletion_multiclass_complete_final_v2_en.md new file mode 100644 index 00000000000000..14888123ce1f40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_deletion_multiclass_complete_final_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_deletion_multiclass_complete_final_v2 DistilBertForSequenceClassification from research-dump +author: John Snow Labs +name: distilbert_base_uncased_deletion_multiclass_complete_final_v2 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_deletion_multiclass_complete_final_v2` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_deletion_multiclass_complete_final_v2_en_5.5.0_3.0_1725394478548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_deletion_multiclass_complete_final_v2_en_5.5.0_3.0_1725394478548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_deletion_multiclass_complete_final_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_deletion_multiclass_complete_final_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_deletion_multiclass_complete_final_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/research-dump/distilbert-base-uncased_deletion_multiclass_complete_final_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_edu_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_edu_classifier_en.md new file mode 100644 index 00000000000000..ab21b6898b7369 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_edu_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_edu_classifier DistilBertForSequenceClassification from pszemraj +author: John Snow Labs +name: distilbert_base_uncased_edu_classifier +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_edu_classifier` is a English model originally trained by pszemraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_edu_classifier_en_5.5.0_3.0_1725394067344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_edu_classifier_en_5.5.0_3.0_1725394067344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_edu_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_edu_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_edu_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/pszemraj/distilbert-base-uncased-edu-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_ag_news_v5_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_ag_news_v5_en.md new file mode 100644 index 00000000000000..f364121abf6acd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_ag_news_v5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ag_news_v5 DistilBertEmbeddings from miggwp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ag_news_v5 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ag_news_v5` is a English model originally trained by miggwp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ag_news_v5_en_5.5.0_3.0_1725384862665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ag_news_v5_en_5.5.0_3.0_1725384862665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_ag_news_v5","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_ag_news_v5","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ag_news_v5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/miggwp/distilbert-base-uncased-finetuned-ag-news-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_en.md new file mode 100644 index 00000000000000..e7b8a275903bdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_aliramikh DistilBertForSequenceClassification from aliramikh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_aliramikh +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_aliramikh` is a English model originally trained by aliramikh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aliramikh_en_5.5.0_3.0_1725394176503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aliramikh_en_5.5.0_3.0_1725394176503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_aliramikh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_aliramikh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_aliramikh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/aliramikh/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en.md new file mode 100644 index 00000000000000..e1c35668945e2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline pipeline DistilBertForSequenceClassification from aliramikh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline` is a English model originally trained by aliramikh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en_5.5.0_3.0_1725394190852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline_en_5.5.0_3.0_1725394190852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_aliramikh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/aliramikh/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_iamsubrata_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_iamsubrata_en.md new file mode 100644 index 00000000000000..8fc96ec849c09c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_iamsubrata_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_iamsubrata DistilBertForSequenceClassification from iamsubrata +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_iamsubrata +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_iamsubrata` is a English model originally trained by iamsubrata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_iamsubrata_en_5.5.0_3.0_1725394680896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_iamsubrata_en_5.5.0_3.0_1725394680896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_iamsubrata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_iamsubrata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_iamsubrata| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/iamsubrata/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en.md new file mode 100644 index 00000000000000..5376a349b17784 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline pipeline DistilBertForSequenceClassification from pulpilisory +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline` is a English model originally trained by pulpilisory. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en_5.5.0_3.0_1725329825315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline_en_5.5.0_3.0_1725329825315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_pulpilisory_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/pulpilisory/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en.md new file mode 100644 index 00000000000000..eacc1323a79ce1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline pipeline DistilBertEmbeddings from cxbn12 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline` is a English model originally trained by cxbn12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en_5.5.0_3.0_1725389365524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline_en_5.5.0_3.0_1725389365524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_cxbn12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cxbn12/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en.md new file mode 100644 index 00000000000000..5fd7157aabb339 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jhhan DistilBertEmbeddings from JHhan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jhhan +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jhhan` is a English model originally trained by JHhan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en_5.5.0_3.0_1725385333902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jhhan_en_5.5.0_3.0_1725385333902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jhhan","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jhhan","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jhhan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JHhan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en.md new file mode 100644 index 00000000000000..6b9f754847a3dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline pipeline DistilBertEmbeddings from ddn0116 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline` is a English model originally trained by ddn0116. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en_5.5.0_3.0_1725389365590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline_en_5.5.0_3.0_1725389365590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ddn0116_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ddn0116/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en.md new file mode 100644 index 00000000000000..c7dd3784202fc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_gertjanvanderwel DistilBertEmbeddings from gertjanvanderwel +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_gertjanvanderwel +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_gertjanvanderwel` is a English model originally trained by gertjanvanderwel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en_5.5.0_3.0_1725385002385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gertjanvanderwel_en_5.5.0_3.0_1725385002385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gertjanvanderwel","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gertjanvanderwel","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_gertjanvanderwel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gertjanvanderwel/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_en.md new file mode 100644 index 00000000000000..4b9154f23fda06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mie_zhz DistilBertEmbeddings from mie-zhz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mie_zhz +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mie_zhz` is a English model originally trained by mie-zhz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mie_zhz_en_5.5.0_3.0_1725389653070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mie_zhz_en_5.5.0_3.0_1725389653070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mie_zhz","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mie_zhz","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mie_zhz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mie-zhz/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en.md new file mode 100644 index 00000000000000..82932cdeed0bc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline pipeline DistilBertEmbeddings from mie-zhz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline` is a English model originally trained by mie-zhz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en_5.5.0_3.0_1725389666185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline_en_5.5.0_3.0_1725389666185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mie_zhz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mie-zhz/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mongdiutindei_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mongdiutindei_en.md new file mode 100644 index 00000000000000..42d0483db792f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_mongdiutindei_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mongdiutindei DistilBertEmbeddings from mongdiutindei +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mongdiutindei +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mongdiutindei` is a English model originally trained by mongdiutindei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mongdiutindei_en_5.5.0_3.0_1725384973851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mongdiutindei_en_5.5.0_3.0_1725384973851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mongdiutindei","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mongdiutindei","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mongdiutindei| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mongdiutindei/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en.md new file mode 100644 index 00000000000000..1e64c5a95c79b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline pipeline DistilBertEmbeddings from qiyuan123 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline` is a English model originally trained by qiyuan123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en_5.5.0_3.0_1725389356257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline_en_5.5.0_3.0_1725389356257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_qiyuan123_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/qiyuan123/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en.md new file mode 100644 index 00000000000000..6c63612b1cb484 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline pipeline DistilBertEmbeddings from rohit5895 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline` is a English model originally trained by rohit5895. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en_5.5.0_3.0_1725389411497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline_en_5.5.0_3.0_1725389411497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rohit5895_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rohit5895/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_sdinger_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_sdinger_en.md new file mode 100644 index 00000000000000..6b80db0a35a26a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_sdinger_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sdinger DistilBertEmbeddings from sdinger +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sdinger +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sdinger` is a English model originally trained by sdinger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sdinger_en_5.5.0_3.0_1725389470451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sdinger_en_5.5.0_3.0_1725389470451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sdinger","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sdinger","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sdinger| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sdinger/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_shenberg1_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_shenberg1_en.md new file mode 100644 index 00000000000000..ad81b712a38321 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_imdb_shenberg1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_shenberg1 DistilBertEmbeddings from shenberg1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_shenberg1 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_shenberg1` is a English model originally trained by shenberg1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shenberg1_en_5.5.0_3.0_1725389833709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shenberg1_en_5.5.0_3.0_1725389833709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_shenberg1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_shenberg1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_shenberg1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shenberg1/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_react_content_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_react_content_pipeline_en.md new file mode 100644 index 00000000000000..51a8f54d6615a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_react_content_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_react_content_pipeline pipeline DistilBertEmbeddings from mjalg +author: John Snow Labs +name: distilbert_base_uncased_finetuned_react_content_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_react_content_pipeline` is a English model originally trained by mjalg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_react_content_pipeline_en_5.5.0_3.0_1725389896408.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_react_content_pipeline_en_5.5.0_3.0_1725389896408.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_react_content_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_react_content_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_react_content_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mjalg/distilbert-base-uncased-finetuned-react-content + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en.md new file mode 100644 index 00000000000000..253309d20ad57e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline pipeline DistilBertEmbeddings from Chrisantha +author: John Snow Labs +name: distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline` is a English model originally trained by Chrisantha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en_5.5.0_3.0_1725389935682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline_en_5.5.0_3.0_1725389935682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_synthetic_finetuned_synthetic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Chrisantha/distilbert-base-uncased-finetuned-synthetic-finetuned-synthetic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_indah1_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_indah1_en.md new file mode 100644 index 00000000000000..f207b50316b71d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_indah1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_imdb_indah1 DistilBertEmbeddings from Indah1 +author: John Snow Labs +name: distilbert_finetuned_imdb_indah1 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_imdb_indah1` is a English model originally trained by Indah1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_indah1_en_5.5.0_3.0_1725384563576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_indah1_en_5.5.0_3.0_1725384563576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_finetuned_imdb_indah1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_finetuned_imdb_indah1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_imdb_indah1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Indah1/distilbert-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_prateekag159_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_prateekag159_pipeline_en.md new file mode 100644 index 00000000000000..04c54497fec447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_finetuned_imdb_prateekag159_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_imdb_prateekag159_pipeline pipeline DistilBertEmbeddings from prateekag159 +author: John Snow Labs +name: distilbert_finetuned_imdb_prateekag159_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_imdb_prateekag159_pipeline` is a English model originally trained by prateekag159. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_prateekag159_pipeline_en_5.5.0_3.0_1725389560515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_prateekag159_pipeline_en_5.5.0_3.0_1725389560515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_imdb_prateekag159_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_imdb_prateekag159_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_imdb_prateekag159_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/prateekag159/distilbert-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_fa.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_fa.md new file mode 100644 index 00000000000000..2673e96d340b23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian distilbert_persian_farsi_zwnj_base DistilBertEmbeddings from HooshvareLab +author: John Snow Labs +name: distilbert_persian_farsi_zwnj_base +date: 2024-09-03 +tags: [fa, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_persian_farsi_zwnj_base` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_persian_farsi_zwnj_base_fa_5.5.0_3.0_1725384779192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_persian_farsi_zwnj_base_fa_5.5.0_3.0_1725384779192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_persian_farsi_zwnj_base","fa") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_persian_farsi_zwnj_base","fa") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_persian_farsi_zwnj_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|fa| +|Size:|282.3 MB| + +## References + +https://huggingface.co/HooshvareLab/distilbert-fa-zwnj-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_pipeline_fa.md new file mode 100644 index 00000000000000..d7b289922331ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_persian_farsi_zwnj_base_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian distilbert_persian_farsi_zwnj_base_pipeline pipeline DistilBertEmbeddings from HooshvareLab +author: John Snow Labs +name: distilbert_persian_farsi_zwnj_base_pipeline +date: 2024-09-03 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_persian_farsi_zwnj_base_pipeline` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_persian_farsi_zwnj_base_pipeline_fa_5.5.0_3.0_1725384794631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_persian_farsi_zwnj_base_pipeline_fa_5.5.0_3.0_1725384794631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_persian_farsi_zwnj_base_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_persian_farsi_zwnj_base_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_persian_farsi_zwnj_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|282.3 MB| + +## References + +https://huggingface.co/HooshvareLab/distilbert-fa-zwnj-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_tokenizer_256k_mlm_best_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_tokenizer_256k_mlm_best_en.md new file mode 100644 index 00000000000000..4238655e8cbd06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_tokenizer_256k_mlm_best_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_tokenizer_256k_mlm_best DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_tokenizer_256k_mlm_best +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tokenizer_256k_mlm_best` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_best_en_5.5.0_3.0_1725384829417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_best_en_5.5.0_3.0_1725384829417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_tokenizer_256k_mlm_best","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_tokenizer_256k_mlm_best","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tokenizer_256k_mlm_best| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|878.7 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-tokenizer_256k-MLM_best \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbert_yelp_sentiment_analysis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbert_yelp_sentiment_analysis_pipeline_en.md new file mode 100644 index 00000000000000..e3edabc06627f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbert_yelp_sentiment_analysis_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_yelp_sentiment_analysis_pipeline pipeline DistilBertForSequenceClassification from noahnsimbe +author: John Snow Labs +name: distilbert_yelp_sentiment_analysis_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_yelp_sentiment_analysis_pipeline` is a English model originally trained by noahnsimbe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_yelp_sentiment_analysis_pipeline_en_5.5.0_3.0_1725394576059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_yelp_sentiment_analysis_pipeline_en_5.5.0_3.0_1725394576059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_yelp_sentiment_analysis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_yelp_sentiment_analysis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_yelp_sentiment_analysis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/noahnsimbe/DistilBERT-yelp-sentiment-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en.md new file mode 100644 index 00000000000000..fb00c726a4eb01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline pipeline RoBertaForSequenceClassification from ali2066 +author: John Snow Labs +name: distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline` is a English model originally trained by ali2066. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en_5.5.0_3.0_1725403293648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline_en_5.5.0_3.0_1725403293648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertfinal_ctxsentence_train_all_test_french_second_train_set_french_false_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.2 MB| + +## References + +https://huggingface.co/ali2066/DistilBERTFINAL_ctxSentence_TRAIN_all_TEST_french_second_train_set_french_False + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilgreek_bert_el.md b/docs/_posts/ahmedlone127/2024-09-03-distilgreek_bert_el.md new file mode 100644 index 00000000000000..7b7ce7832d7ff1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilgreek_bert_el.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Modern Greek (1453-) distilgreek_bert DistilBertEmbeddings from EftychiaKarav +author: John Snow Labs +name: distilgreek_bert +date: 2024-09-03 +tags: [el, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: el +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilgreek_bert` is a Modern Greek (1453-) model originally trained by EftychiaKarav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilgreek_bert_el_5.5.0_3.0_1725385238895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilgreek_bert_el_5.5.0_3.0_1725385238895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilgreek_bert","el") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilgreek_bert","el") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilgreek_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|el| +|Size:|262.2 MB| + +## References + +https://huggingface.co/EftychiaKarav/DistilGREEK-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilkobert_ft_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilkobert_ft_en.md new file mode 100644 index 00000000000000..ba5d8cae437f93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilkobert_ft_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilkobert_ft DistilBertForSequenceClassification from yeye776 +author: John Snow Labs +name: distilkobert_ft +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilkobert_ft` is a English model originally trained by yeye776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilkobert_ft_en_5.5.0_3.0_1725394552048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilkobert_ft_en_5.5.0_3.0_1725394552048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilkobert_ft","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilkobert_ft", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilkobert_ft| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|106.5 MB| + +## References + +https://huggingface.co/yeye776/DistilKoBERT-ft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-distilroberta_sst2_en.md b/docs/_posts/ahmedlone127/2024-09-03-distilroberta_sst2_en.md new file mode 100644 index 00000000000000..aab1b0533f52c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-distilroberta_sst2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_sst2 RoBertaForSequenceClassification from gokuls +author: John Snow Labs +name: distilroberta_sst2 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_sst2` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_sst2_en_5.5.0_3.0_1725369499053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_sst2_en_5.5.0_3.0_1725369499053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("distilroberta_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("distilroberta_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_sst2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/gokuls/distilroberta-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-dlfbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-dlfbert_pipeline_en.md new file mode 100644 index 00000000000000..f64d5d04cf2521 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-dlfbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dlfbert_pipeline pipeline RoBertaForSequenceClassification from PubChimps +author: John Snow Labs +name: dlfbert_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlfbert_pipeline` is a English model originally trained by PubChimps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlfbert_pipeline_en_5.5.0_3.0_1725368800194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlfbert_pipeline_en_5.5.0_3.0_1725368800194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dlfbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dlfbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlfbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|314.0 MB| + +## References + +https://huggingface.co/PubChimps/dlfBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-dummy_model_edge2992_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-dummy_model_edge2992_pipeline_en.md new file mode 100644 index 00000000000000..315b3c02ec4777 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-dummy_model_edge2992_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_edge2992_pipeline pipeline CamemBertEmbeddings from edge2992 +author: John Snow Labs +name: dummy_model_edge2992_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_edge2992_pipeline` is a English model originally trained by edge2992. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_edge2992_pipeline_en_5.5.0_3.0_1725407762631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_edge2992_pipeline_en_5.5.0_3.0_1725407762631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_edge2992_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_edge2992_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_edge2992_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/edge2992/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-dummy_model_mhrecaldeb_en.md b/docs/_posts/ahmedlone127/2024-09-03-dummy_model_mhrecaldeb_en.md new file mode 100644 index 00000000000000..33b282133c9abd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-dummy_model_mhrecaldeb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_mhrecaldeb CamemBertEmbeddings from mhrecaldeb +author: John Snow Labs +name: dummy_model_mhrecaldeb +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_mhrecaldeb` is a English model originally trained by mhrecaldeb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_mhrecaldeb_en_5.5.0_3.0_1725407679065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_mhrecaldeb_en_5.5.0_3.0_1725407679065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_mhrecaldeb","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_mhrecaldeb","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_mhrecaldeb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/mhrecaldeb/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-e5_large_v2_vectoriseai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-e5_large_v2_vectoriseai_pipeline_en.md new file mode 100644 index 00000000000000..522cfcebcc3890 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-e5_large_v2_vectoriseai_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English e5_large_v2_vectoriseai_pipeline pipeline E5Embeddings from vectoriseai +author: John Snow Labs +name: e5_large_v2_vectoriseai_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e5_large_v2_vectoriseai_pipeline` is a English model originally trained by vectoriseai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e5_large_v2_vectoriseai_pipeline_en_5.5.0_3.0_1725345069301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e5_large_v2_vectoriseai_pipeline_en_5.5.0_3.0_1725345069301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("e5_large_v2_vectoriseai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("e5_large_v2_vectoriseai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e5_large_v2_vectoriseai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.2 MB| + +## References + +https://huggingface.co/vectoriseai/e5-large-v2 + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-embedded_e5_base_500_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-embedded_e5_base_500_pipeline_en.md new file mode 100644 index 00000000000000..3732683a88656e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-embedded_e5_base_500_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English embedded_e5_base_500_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: embedded_e5_base_500_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`embedded_e5_base_500_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/embedded_e5_base_500_pipeline_en_5.5.0_3.0_1725344492675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/embedded_e5_base_500_pipeline_en_5.5.0_3.0_1725344492675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("embedded_e5_base_500_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("embedded_e5_base_500_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|embedded_e5_base_500_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.6 MB| + +## References + +https://huggingface.co/rithwik-db/embedded-e5-base-500 + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-emotion_amaniabuzaid_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-emotion_amaniabuzaid_pipeline_en.md new file mode 100644 index 00000000000000..fbd22dc16a03cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-emotion_amaniabuzaid_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English emotion_amaniabuzaid_pipeline pipeline DistilBertForSequenceClassification from amaniabuzaid +author: John Snow Labs +name: emotion_amaniabuzaid_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emotion_amaniabuzaid_pipeline` is a English model originally trained by amaniabuzaid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emotion_amaniabuzaid_pipeline_en_5.5.0_3.0_1725394607564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emotion_amaniabuzaid_pipeline_en_5.5.0_3.0_1725394607564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("emotion_amaniabuzaid_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("emotion_amaniabuzaid_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emotion_amaniabuzaid_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/amaniabuzaid/emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-english_tamil_translator_en.md b/docs/_posts/ahmedlone127/2024-09-03-english_tamil_translator_en.md new file mode 100644 index 00000000000000..ba0aaa727e361b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-english_tamil_translator_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_tamil_translator MarianTransformer from Vasanth +author: John Snow Labs +name: english_tamil_translator +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_tamil_translator` is a English model originally trained by Vasanth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_tamil_translator_en_5.5.0_3.0_1725345676449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_tamil_translator_en_5.5.0_3.0_1725345676449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_tamil_translator","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_tamil_translator","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_tamil_translator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.2 MB| + +## References + +https://huggingface.co/Vasanth/en-ta-translator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_ganda_nllb_en.md b/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_ganda_nllb_en.md new file mode 100644 index 00000000000000..153cc58c82db88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_ganda_nllb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_tonga_tonga_islands_ganda_nllb MarianTransformer from EricPeter +author: John Snow Labs +name: english_tonga_tonga_islands_ganda_nllb +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_tonga_tonga_islands_ganda_nllb` is a English model originally trained by EricPeter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_ganda_nllb_en_5.5.0_3.0_1725404143289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_ganda_nllb_en_5.5.0_3.0_1725404143289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_tonga_tonga_islands_ganda_nllb","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_tonga_tonga_islands_ganda_nllb","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_tonga_tonga_islands_ganda_nllb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|529.9 MB| + +## References + +https://huggingface.co/EricPeter/en-to-lg-nllb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_turkish_finetuned_model_en.md b/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_turkish_finetuned_model_en.md new file mode 100644 index 00000000000000..b5a38c7a25ca90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-english_tonga_tonga_islands_turkish_finetuned_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_tonga_tonga_islands_turkish_finetuned_model MarianTransformer from ckartal +author: John Snow Labs +name: english_tonga_tonga_islands_turkish_finetuned_model +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_tonga_tonga_islands_turkish_finetuned_model` is a English model originally trained by ckartal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_turkish_finetuned_model_en_5.5.0_3.0_1725404023634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_turkish_finetuned_model_en_5.5.0_3.0_1725404023634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_tonga_tonga_islands_turkish_finetuned_model","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_tonga_tonga_islands_turkish_finetuned_model","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_tonga_tonga_islands_turkish_finetuned_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|509.8 MB| + +## References + +https://huggingface.co/ckartal/english-to-turkish-finetuned-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_en.md b/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_en.md new file mode 100644 index 00000000000000..918c9ecf307a53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fine_tuned_twitter_roberta_base_sentiment_latest RoBertaForSequenceClassification from dilkasithari-IT +author: John Snow Labs +name: fine_tuned_twitter_roberta_base_sentiment_latest +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_twitter_roberta_base_sentiment_latest` is a English model originally trained by dilkasithari-IT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_twitter_roberta_base_sentiment_latest_en_5.5.0_3.0_1725402849457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_twitter_roberta_base_sentiment_latest_en_5.5.0_3.0_1725402849457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("fine_tuned_twitter_roberta_base_sentiment_latest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("fine_tuned_twitter_roberta_base_sentiment_latest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_twitter_roberta_base_sentiment_latest| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/dilkasithari-IT/fine-tuned-twitter-roberta-base-sentiment-latest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en.md new file mode 100644 index 00000000000000..22ba3dbc3fb18c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fine_tuned_twitter_roberta_base_sentiment_latest_pipeline pipeline RoBertaForSequenceClassification from dilkasithari-IT +author: John Snow Labs +name: fine_tuned_twitter_roberta_base_sentiment_latest_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_twitter_roberta_base_sentiment_latest_pipeline` is a English model originally trained by dilkasithari-IT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en_5.5.0_3.0_1725402875423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_twitter_roberta_base_sentiment_latest_pipeline_en_5.5.0_3.0_1725402875423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_twitter_roberta_base_sentiment_latest_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_twitter_roberta_base_sentiment_latest_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_twitter_roberta_base_sentiment_latest_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/dilkasithari-IT/fine-tuned-twitter-roberta-base-sentiment-latest + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-finer_ord_transformers_2_en.md b/docs/_posts/ahmedlone127/2024-09-03-finer_ord_transformers_2_en.md new file mode 100644 index 00000000000000..ebe153e2a40cf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-finer_ord_transformers_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finer_ord_transformers_2 XlmRoBertaForTokenClassification from elshehawy +author: John Snow Labs +name: finer_ord_transformers_2 +date: 2024-09-03 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finer_ord_transformers_2` is a English model originally trained by elshehawy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finer_ord_transformers_2_en_5.5.0_3.0_1725373687016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finer_ord_transformers_2_en_5.5.0_3.0_1725373687016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("finer_ord_transformers_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("finer_ord_transformers_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finer_ord_transformers_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|782.3 MB| + +## References + +https://huggingface.co/elshehawy/finer-ord-transformers-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en.md new file mode 100644 index 00000000000000..b561f9fe9e3321 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline pipeline MarianTransformer from brjezierski +author: John Snow Labs +name: finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline` is a English model originally trained by brjezierski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en_5.5.0_3.0_1725345694881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline_en_5.5.0_3.0_1725345694881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_polish_tonga_tonga_islands_szl_siling_corrected_aligned_20e_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|386.4 MB| + +## References + +https://huggingface.co/brjezierski/finetuned-pl-to-szl-siling-corrected-aligned-20e + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en.md b/docs/_posts/ahmedlone127/2024-09-03-finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en.md new file mode 100644 index 00000000000000..5b7705ccb742f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1 MPNetEmbeddings from Deehan1866 +author: John Snow Labs +name: finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1` is a English model originally trained by Deehan1866. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en_5.5.0_3.0_1725350543526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1_en_5.5.0_3.0_1725350543526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sentence_transformers_multi_qa_mpnet_base_dot_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Deehan1866/finetuned-sentence-transformers-multi-qa-mpnet-base-dot-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-finetuned_twitter_profane_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-03-finetuned_twitter_profane_roberta_en.md new file mode 100644 index 00000000000000..c7c918156055db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-finetuned_twitter_profane_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_twitter_profane_roberta XlmRoBertaForSequenceClassification from coderSounak +author: John Snow Labs +name: finetuned_twitter_profane_roberta +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_twitter_profane_roberta` is a English model originally trained by coderSounak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_twitter_profane_roberta_en_5.5.0_3.0_1725395227114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_twitter_profane_roberta_en_5.5.0_3.0_1725395227114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("finetuned_twitter_profane_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("finetuned_twitter_profane_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_twitter_profane_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/coderSounak/finetuned_twitter_profane_roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-finetuning_sentiment_model_3000_samples_parth05_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-finetuning_sentiment_model_3000_samples_parth05_pipeline_en.md new file mode 100644 index 00000000000000..55fe4856ff6ef1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-finetuning_sentiment_model_3000_samples_parth05_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_parth05_pipeline pipeline DistilBertForSequenceClassification from Parth05 +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_parth05_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_parth05_pipeline` is a English model originally trained by Parth05. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_parth05_pipeline_en_5.5.0_3.0_1725394357113.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_parth05_pipeline_en_5.5.0_3.0_1725394357113.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_sentiment_model_3000_samples_parth05_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_sentiment_model_3000_samples_parth05_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_parth05_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Parth05/finetuning-sentiment-model-3000-samples + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-ft_clone_en.md b/docs/_posts/ahmedlone127/2024-09-03-ft_clone_en.md new file mode 100644 index 00000000000000..94041f29cd566d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-ft_clone_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ft_clone MarianTransformer from Zeen0 +author: John Snow Labs +name: ft_clone +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_clone` is a English model originally trained by Zeen0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_clone_en_5.5.0_3.0_1725345689473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_clone_en_5.5.0_3.0_1725345689473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("ft_clone","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("ft_clone","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_clone| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/Zeen0/ft_clone \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-ft_clone_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-ft_clone_pipeline_en.md new file mode 100644 index 00000000000000..bff7e2dc793cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-ft_clone_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ft_clone_pipeline pipeline MarianTransformer from Zeen0 +author: John Snow Labs +name: ft_clone_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_clone_pipeline` is a English model originally trained by Zeen0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_clone_pipeline_en_5.5.0_3.0_1725345716144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_clone_pipeline_en_5.5.0_3.0_1725345716144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ft_clone_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ft_clone_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_clone_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/Zeen0/ft_clone + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_scifact_k10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_scifact_k10_pipeline_en.md new file mode 100644 index 00000000000000..299c71afa7dbd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_scifact_k10_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English gpl_e5_base_unsupervised_scifact_k10_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: gpl_e5_base_unsupervised_scifact_k10_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gpl_e5_base_unsupervised_scifact_k10_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gpl_e5_base_unsupervised_scifact_k10_pipeline_en_5.5.0_3.0_1725340922377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gpl_e5_base_unsupervised_scifact_k10_pipeline_en_5.5.0_3.0_1725340922377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gpl_e5_base_unsupervised_scifact_k10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gpl_e5_base_unsupervised_scifact_k10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gpl_e5_base_unsupervised_scifact_k10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|395.7 MB| + +## References + +https://huggingface.co/rithwik-db/gpl-e5-base-unsupervised-scifact-k10 + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_test_1_en.md b/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_test_1_en.md new file mode 100644 index 00000000000000..79af981ab3f351 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-gpl_e5_base_unsupervised_test_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English gpl_e5_base_unsupervised_test_1 E5Embeddings from rithwik-db +author: John Snow Labs +name: gpl_e5_base_unsupervised_test_1 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gpl_e5_base_unsupervised_test_1` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gpl_e5_base_unsupervised_test_1_en_5.5.0_3.0_1725340224362.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gpl_e5_base_unsupervised_test_1_en_5.5.0_3.0_1725340224362.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("gpl_e5_base_unsupervised_test_1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("gpl_e5_base_unsupervised_test_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gpl_e5_base_unsupervised_test_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|388.5 MB| + +## References + +https://huggingface.co/rithwik-db/gpl-e5-base-unsupervised-test-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en.md b/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en.md new file mode 100644 index 00000000000000..3757c04eb18473 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English gpl_tsdae_e5_base_unsupervised_test_1_d165d6 E5Embeddings from rithwik-db +author: John Snow Labs +name: gpl_tsdae_e5_base_unsupervised_test_1_d165d6 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, e5] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: E5Embeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gpl_tsdae_e5_base_unsupervised_test_1_d165d6` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en_5.5.0_3.0_1725344351594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gpl_tsdae_e5_base_unsupervised_test_1_d165d6_en_5.5.0_3.0_1725344351594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = E5Embeddings.pretrained("gpl_tsdae_e5_base_unsupervised_test_1_d165d6","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = E5Embeddings.pretrained("gpl_tsdae_e5_base_unsupervised_test_1_d165d6","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gpl_tsdae_e5_base_unsupervised_test_1_d165d6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[E5]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/rithwik-db/gpl_tsdae-e5-base-unsupervised-test-1-d165d6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en.md new file mode 100644 index 00000000000000..afe5fff98cde2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline pipeline E5Embeddings from rithwik-db +author: John Snow Labs +name: gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained E5Embeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en_5.5.0_3.0_1725344371451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline_en_5.5.0_3.0_1725344371451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gpl_tsdae_e5_base_unsupervised_test_1_d165d6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/rithwik-db/gpl_tsdae-e5-base-unsupervised-test-1-d165d6 + +## Included Models + +- DocumentAssembler +- E5Embeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-helsinki_altp_indonesian_english_en.md b/docs/_posts/ahmedlone127/2024-09-03-helsinki_altp_indonesian_english_en.md new file mode 100644 index 00000000000000..720109e0da78c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-helsinki_altp_indonesian_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_altp_indonesian_english MarianTransformer from Mikask +author: John Snow Labs +name: helsinki_altp_indonesian_english +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_altp_indonesian_english` is a English model originally trained by Mikask. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_altp_indonesian_english_en_5.5.0_3.0_1725403942092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_altp_indonesian_english_en_5.5.0_3.0_1725403942092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_altp_indonesian_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_altp_indonesian_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_altp_indonesian_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|480.1 MB| + +## References + +https://huggingface.co/Mikask/helsinki-altp-id-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-03-helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en.md new file mode 100644 index 00000000000000..db240b56513c59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate MarianTransformer from MikolajDeja +author: John Snow Labs +name: helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate` is a English model originally trained by MikolajDeja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en_5.5.0_3.0_1725404585694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate_en_5.5.0_3.0_1725404585694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_nlp_opus_maltese_multiple_languages_english_opus100_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.9 MB| + +## References + +https://huggingface.co/MikolajDeja/Helsinki-NLP-opus-mt-mul-en-opus100-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-industry_classification_en.md b/docs/_posts/ahmedlone127/2024-09-03-industry_classification_en.md new file mode 100644 index 00000000000000..160f03f5c610e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-industry_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English industry_classification DistilBertForSequenceClassification from swarupt +author: John Snow Labs +name: industry_classification +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`industry_classification` is a English model originally trained by swarupt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/industry_classification_en_5.5.0_3.0_1725394161351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/industry_classification_en_5.5.0_3.0_1725394161351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("industry_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("industry_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|industry_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/swarupt/industry-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-industry_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-industry_classification_pipeline_en.md new file mode 100644 index 00000000000000..0ba87e84b06d88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-industry_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English industry_classification_pipeline pipeline DistilBertForSequenceClassification from swarupt +author: John Snow Labs +name: industry_classification_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`industry_classification_pipeline` is a English model originally trained by swarupt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/industry_classification_pipeline_en_5.5.0_3.0_1725394174788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/industry_classification_pipeline_en_5.5.0_3.0_1725394174788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("industry_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("industry_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|industry_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/swarupt/industry-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-khmer_xlm_roberta_base_pipeline_km.md b/docs/_posts/ahmedlone127/2024-09-03-khmer_xlm_roberta_base_pipeline_km.md new file mode 100644 index 00000000000000..f558bcee0bcb79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-khmer_xlm_roberta_base_pipeline_km.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Central Khmer, Khmer khmer_xlm_roberta_base_pipeline pipeline XlmRoBertaEmbeddings from channudam +author: John Snow Labs +name: khmer_xlm_roberta_base_pipeline +date: 2024-09-03 +tags: [km, open_source, pipeline, onnx] +task: Embeddings +language: km +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`khmer_xlm_roberta_base_pipeline` is a Central Khmer, Khmer model originally trained by channudam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/khmer_xlm_roberta_base_pipeline_km_5.5.0_3.0_1725391621478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/khmer_xlm_roberta_base_pipeline_km_5.5.0_3.0_1725391621478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("khmer_xlm_roberta_base_pipeline", lang = "km") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("khmer_xlm_roberta_base_pipeline", lang = "km") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|khmer_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|km| +|Size:|1.0 GB| + +## References + +https://huggingface.co/channudam/khmer-xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-lab1_random_coloteong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-lab1_random_coloteong_pipeline_en.md new file mode 100644 index 00000000000000..0ba478502a89b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-lab1_random_coloteong_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_random_coloteong_pipeline pipeline MarianTransformer from coloteong +author: John Snow Labs +name: lab1_random_coloteong_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_coloteong_pipeline` is a English model originally trained by coloteong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_coloteong_pipeline_en_5.5.0_3.0_1725404811050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_coloteong_pipeline_en_5.5.0_3.0_1725404811050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_random_coloteong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_random_coloteong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_coloteong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/coloteong/lab1_random + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en.md new file mode 100644 index 00000000000000..cbe503327013ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline pipeline XlmRoBertaForSequenceClassification from ivanlau +author: John Snow Labs +name: language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline` is a English model originally trained by ivanlau. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en_5.5.0_3.0_1725327962688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline_en_5.5.0_3.0_1725327962688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|language_detection_fine_tuned_on_xlm_roberta_base_ivanlau_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|844.0 MB| + +## References + +https://huggingface.co/ivanlau/language-detection-fine-tuned-on-xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-legalevalrr_en.md b/docs/_posts/ahmedlone127/2024-09-03-legalevalrr_en.md new file mode 100644 index 00000000000000..5d47d9b600da9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-legalevalrr_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English legalevalrr MPNetEmbeddings from simplexico +author: John Snow Labs +name: legalevalrr +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalevalrr` is a English model originally trained by simplexico. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalevalrr_en_5.5.0_3.0_1725350918757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalevalrr_en_5.5.0_3.0_1725350918757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("legalevalrr","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("legalevalrr","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalevalrr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/simplexico/legalevalrr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en.md new file mode 100644 index 00000000000000..735cf0cab54b18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline pipeline MarianTransformer from icep0ps +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline` is a English model originally trained by icep0ps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en_5.5.0_3.0_1725403734807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline_en_5.5.0_3.0_1725403734807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_kinyarwanda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|211.0 MB| + +## References + +https://huggingface.co/icep0ps/marian-finetuned-kde4-en-to-rw + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en.md new file mode 100644 index 00000000000000..221a1d0acc5444 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline pipeline MarianTransformer from DarioLopes +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline` is a English model originally trained by DarioLopes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en_5.5.0_3.0_1725346066037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline_en_5.5.0_3.0_1725346066037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_portuguese_breton_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/DarioLopes/marian-finetuned-kde4-en-to-pt_BR + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-mdeberta_expl_extraction_multi_en.md b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_expl_extraction_multi_en.md new file mode 100644 index 00000000000000..541fa78580af8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_expl_extraction_multi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_expl_extraction_multi DeBertaForTokenClassification from HiTZ +author: John Snow Labs +name: mdeberta_expl_extraction_multi +date: 2024-09-03 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_expl_extraction_multi` is a English model originally trained by HiTZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_expl_extraction_multi_en_5.5.0_3.0_1725387442832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_expl_extraction_multi_en_5.5.0_3.0_1725387442832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_expl_extraction_multi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_expl_extraction_multi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_expl_extraction_multi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|918.6 MB| + +## References + +https://huggingface.co/HiTZ/mdeberta-expl-extraction-multi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_finetuned_sayula_popoluca_en.md b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_finetuned_sayula_popoluca_en.md new file mode 100644 index 00000000000000..fc42eaa3fa964f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_finetuned_sayula_popoluca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_finetuned_sayula_popoluca DeBertaForTokenClassification from Emanuel +author: John Snow Labs +name: mdeberta_v3_base_finetuned_sayula_popoluca +date: 2024-09-03 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_finetuned_sayula_popoluca` is a English model originally trained by Emanuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuned_sayula_popoluca_en_5.5.0_3.0_1725400994284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuned_sayula_popoluca_en_5.5.0_3.0_1725400994284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_finetuned_sayula_popoluca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_finetuned_sayula_popoluca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_finetuned_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|852.3 MB| + +## References + +https://huggingface.co/Emanuel/mdeberta-v3-base-finetuned-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_nubes_es.md b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_nubes_es.md new file mode 100644 index 00000000000000..20fa25f6b8482b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-mdeberta_v3_base_nubes_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish mdeberta_v3_base_nubes DeBertaForTokenClassification from IIC +author: John Snow Labs +name: mdeberta_v3_base_nubes +date: 2024-09-03 +tags: [es, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_nubes` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_nubes_es_5.5.0_3.0_1725400716695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_nubes_es_5.5.0_3.0_1725400716695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_nubes","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_nubes", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_nubes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|797.4 MB| + +## References + +https://huggingface.co/IIC/mdeberta-v3-base-nubes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-medical_pubmed_8_2_en.md b/docs/_posts/ahmedlone127/2024-09-03-medical_pubmed_8_2_en.md new file mode 100644 index 00000000000000..e14f9e5f9d37c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-medical_pubmed_8_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English medical_pubmed_8_2 MarianTransformer from DogGoesBark +author: John Snow Labs +name: medical_pubmed_8_2 +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_pubmed_8_2` is a English model originally trained by DogGoesBark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_pubmed_8_2_en_5.5.0_3.0_1725404691002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_pubmed_8_2_en_5.5.0_3.0_1725404691002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("medical_pubmed_8_2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("medical_pubmed_8_2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_pubmed_8_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|540.0 MB| + +## References + +https://huggingface.co/DogGoesBark/medical_pubmed_8_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-medrurobertalarge_ru.md b/docs/_posts/ahmedlone127/2024-09-03-medrurobertalarge_ru.md new file mode 100644 index 00000000000000..222612632a0c1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-medrurobertalarge_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian medrurobertalarge RoBertaEmbeddings from DmitryPogrebnoy +author: John Snow Labs +name: medrurobertalarge +date: 2024-09-03 +tags: [ru, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medrurobertalarge` is a Russian model originally trained by DmitryPogrebnoy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medrurobertalarge_ru_5.5.0_3.0_1725375674938.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medrurobertalarge_ru_5.5.0_3.0_1725375674938.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("medrurobertalarge","ru") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("medrurobertalarge","ru") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medrurobertalarge| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ru| +|Size:|1.3 GB| + +## References + +https://huggingface.co/DmitryPogrebnoy/MedRuRobertaLarge \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en.md new file mode 100644 index 00000000000000..122cde17f2616c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline pipeline DeBertaForTokenClassification from Yanis +author: John Snow Labs +name: microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline` is a English model originally trained by Yanis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en_5.5.0_3.0_1725387756673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline_en_5.5.0_3.0_1725387756673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_ner_conll2003_breast_without_castellon_castellon_30_docs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Yanis/microsoft-deberta-v3-large_ner_conll2003-breast-without-castellon-castellon-30-docs + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en.md b/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en.md new file mode 100644 index 00000000000000..0ca4bbb42a7391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2 DeBertaForTokenClassification from Yanis +author: John Snow Labs +name: microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2 +date: 2024-09-03 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2` is a English model originally trained by Yanis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en_5.5.0_3.0_1725400726712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2_en_5.5.0_3.0_1725400726712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_ner_conll2003_latin_fe_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Yanis/microsoft-deberta-v3-large_ner_conll2003-la-fe-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-mpnet_base_snli_mnli_finetuned_mnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-mpnet_base_snli_mnli_finetuned_mnli_pipeline_en.md new file mode 100644 index 00000000000000..e1f7fa64b80b51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-mpnet_base_snli_mnli_finetuned_mnli_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_base_snli_mnli_finetuned_mnli_pipeline pipeline MPNetForSequenceClassification from NicolasLe +author: John Snow Labs +name: mpnet_base_snli_mnli_finetuned_mnli_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_snli_mnli_finetuned_mnli_pipeline` is a English model originally trained by NicolasLe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_snli_mnli_finetuned_mnli_pipeline_en_5.5.0_3.0_1725387095492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_snli_mnli_finetuned_mnli_pipeline_en_5.5.0_3.0_1725387095492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_snli_mnli_finetuned_mnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_snli_mnli_finetuned_mnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_snli_mnli_finetuned_mnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/NicolasLe/mpnet-base-snli-mnli-finetuned-mnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en.md b/docs/_posts/ahmedlone127/2024-09-03-mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en.md new file mode 100644 index 00000000000000..c8cbefad9ad0d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3 MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3 +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en_5.5.0_3.0_1725350751049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3_en_5.5.0_3.0_1725350751049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_frozen_newtriplets_v2_lr_2e_5_m_1_e_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/mpnet-frozen-newtriplets-v2-lr-2e-5-m-1-e-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-n_roberta_twitterfin_padding60model_en.md b/docs/_posts/ahmedlone127/2024-09-03-n_roberta_twitterfin_padding60model_en.md new file mode 100644 index 00000000000000..9d88d4f32576f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-n_roberta_twitterfin_padding60model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English n_roberta_twitterfin_padding60model RoBertaForSequenceClassification from Realgon +author: John Snow Labs +name: n_roberta_twitterfin_padding60model +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`n_roberta_twitterfin_padding60model` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/n_roberta_twitterfin_padding60model_en_5.5.0_3.0_1725337017337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/n_roberta_twitterfin_padding60model_en_5.5.0_3.0_1725337017337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("n_roberta_twitterfin_padding60model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("n_roberta_twitterfin_padding60model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|n_roberta_twitterfin_padding60model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|443.2 MB| + +## References + +https://huggingface.co/Realgon/N_roberta_twitterfin_padding60model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-nuner_v2_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-nuner_v2_0_pipeline_en.md new file mode 100644 index 00000000000000..ee380d8a3bee59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-nuner_v2_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nuner_v2_0_pipeline pipeline RoBertaForTokenClassification from numind +author: John Snow Labs +name: nuner_v2_0_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuner_v2_0_pipeline` is a English model originally trained by numind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuner_v2_0_pipeline_en_5.5.0_3.0_1725383029394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuner_v2_0_pipeline_en_5.5.0_3.0_1725383029394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nuner_v2_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nuner_v2_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuner_v2_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.8 MB| + +## References + +https://huggingface.co/numind/NuNER-v2.0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en.md new file mode 100644 index 00000000000000..b42a41443d9ecf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opensearch_neural_sparse_encoding_doc_v2_distill_pipeline pipeline DistilBertEmbeddings from opensearch-project +author: John Snow Labs +name: opensearch_neural_sparse_encoding_doc_v2_distill_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opensearch_neural_sparse_encoding_doc_v2_distill_pipeline` is a English model originally trained by opensearch-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en_5.5.0_3.0_1725384797888.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opensearch_neural_sparse_encoding_doc_v2_distill_pipeline_en_5.5.0_3.0_1725384797888.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opensearch_neural_sparse_encoding_doc_v2_distill_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opensearch_neural_sparse_encoding_doc_v2_distill_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opensearch_neural_sparse_encoding_doc_v2_distill_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en.md new file mode 100644 index 00000000000000..363754feca546a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline pipeline MarianTransformer from enimai +author: John Snow Labs +name: opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline` is a English model originally trained by enimai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en_5.5.0_3.0_1725403989885.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline_en_5.5.0_3.0_1725403989885.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_italian_finetuned_english_tonga_tonga_islands_italian_enimai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|623.6 MB| + +## References + +https://huggingface.co/enimai/opus-mt-en-it-finetuned-en-to-it + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en.md b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en.md new file mode 100644 index 00000000000000..3bbc93a7b19020 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat MarianTransformer from Chayawat +author: John Snow Labs +name: opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat` is a English model originally trained by Chayawat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en_5.5.0_3.0_1725403985729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_en_5.5.0_3.0_1725403985729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.3 MB| + +## References + +https://huggingface.co/Chayawat/opus-mt-en-mul-finetuned-en-to-th \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en.md new file mode 100644 index 00000000000000..9fffaaa81de434 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline pipeline MarianTransformer from Chayawat +author: John Snow Labs +name: opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline` is a English model originally trained by Chayawat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en_5.5.0_3.0_1725404015401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline_en_5.5.0_3.0_1725404015401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_chayawat_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|530.8 MB| + +## References + +https://huggingface.co/Chayawat/opus-mt-en-mul-finetuned-en-to-th + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_base_danish_pipeline_da.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_base_danish_pipeline_da.md new file mode 100644 index 00000000000000..0fab37c8f34baa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_base_danish_pipeline_da.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Danish roberta_base_danish_pipeline pipeline RoBertaEmbeddings from DDSC +author: John Snow Labs +name: roberta_base_danish_pipeline +date: 2024-09-03 +tags: [da, open_source, pipeline, onnx] +task: Embeddings +language: da +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_danish_pipeline` is a Danish model originally trained by DDSC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_danish_pipeline_da_5.5.0_3.0_1725382099230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_danish_pipeline_da_5.5.0_3.0_1725382099230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_danish_pipeline", lang = "da") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_danish_pipeline", lang = "da") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_danish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|da| +|Size:|466.0 MB| + +## References + +https://huggingface.co/DDSC/roberta-base-danish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_base_russian_v0_ru.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_base_russian_v0_ru.md new file mode 100644 index 00000000000000..e259d9ef5a7132 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_base_russian_v0_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian roberta_base_russian_v0 RoBertaEmbeddings from blinoff +author: John Snow Labs +name: roberta_base_russian_v0 +date: 2024-09-03 +tags: [ru, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_russian_v0` is a Russian model originally trained by blinoff. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_russian_v0_ru_5.5.0_3.0_1725375096954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_russian_v0_ru_5.5.0_3.0_1725375096954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_russian_v0","ru") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_russian_v0","ru") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_russian_v0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ru| +|Size:|465.1 MB| + +## References + +https://huggingface.co/blinoff/roberta-base-russian-v0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en.md new file mode 100644 index 00000000000000..f198a88a80e4dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_classifier_autonlp_persian_farsi_473312409_pipeline pipeline RoBertaForSequenceClassification from Anamika +author: John Snow Labs +name: roberta_classifier_autonlp_persian_farsi_473312409_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_classifier_autonlp_persian_farsi_473312409_pipeline` is a English model originally trained by Anamika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en_5.5.0_3.0_1725336914057.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_classifier_autonlp_persian_farsi_473312409_pipeline_en_5.5.0_3.0_1725336914057.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_classifier_autonlp_persian_farsi_473312409_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_classifier_autonlp_persian_farsi_473312409_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_classifier_autonlp_persian_farsi_473312409_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|308.7 MB| + +## References + +https://huggingface.co/Anamika/autonlp-fa-473312409 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_cwe_classifier_kelemia_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_cwe_classifier_kelemia_pipeline_en.md new file mode 100644 index 00000000000000..dbad4d0e355b29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_cwe_classifier_kelemia_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_cwe_classifier_kelemia_pipeline pipeline RoBertaForSequenceClassification from Dunateo +author: John Snow Labs +name: roberta_cwe_classifier_kelemia_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_cwe_classifier_kelemia_pipeline` is a English model originally trained by Dunateo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_cwe_classifier_kelemia_pipeline_en_5.5.0_3.0_1725402597904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_cwe_classifier_kelemia_pipeline_en_5.5.0_3.0_1725402597904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_cwe_classifier_kelemia_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_cwe_classifier_kelemia_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_cwe_classifier_kelemia_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|436.4 MB| + +## References + +https://huggingface.co/Dunateo/roberta-cwe-classifier-kelemia + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_embeddings_amharic_roberta_pipeline_am.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_embeddings_amharic_roberta_pipeline_am.md new file mode 100644 index 00000000000000..f6e352a45f41d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_embeddings_amharic_roberta_pipeline_am.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Amharic roberta_embeddings_amharic_roberta_pipeline pipeline RoBertaEmbeddings from uhhlt +author: John Snow Labs +name: roberta_embeddings_amharic_roberta_pipeline +date: 2024-09-03 +tags: [am, open_source, pipeline, onnx] +task: Embeddings +language: am +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_embeddings_amharic_roberta_pipeline` is a Amharic model originally trained by uhhlt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_embeddings_amharic_roberta_pipeline_am_5.5.0_3.0_1725375279380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_embeddings_amharic_roberta_pipeline_am_5.5.0_3.0_1725375279380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_embeddings_amharic_roberta_pipeline", lang = "am") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_embeddings_amharic_roberta_pipeline", lang = "am") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_embeddings_amharic_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|am| +|Size:|1.6 GB| + +## References + +https://huggingface.co/uhhlt/am-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_large_financial_news_sentiment_english_en.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_large_financial_news_sentiment_english_en.md new file mode 100644 index 00000000000000..2ad62ea739255a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_large_financial_news_sentiment_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_financial_news_sentiment_english RoBertaForSequenceClassification from Jean-Baptiste +author: John Snow Labs +name: roberta_large_financial_news_sentiment_english +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_financial_news_sentiment_english` is a English model originally trained by Jean-Baptiste. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_financial_news_sentiment_english_en_5.5.0_3.0_1725369847231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_financial_news_sentiment_english_en_5.5.0_3.0_1725369847231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_financial_news_sentiment_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_financial_news_sentiment_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_financial_news_sentiment_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Jean-Baptiste/roberta-large-financial-news-sentiment-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_large_finnish_finnish_nlp_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_large_finnish_finnish_nlp_pipeline_fi.md new file mode 100644 index 00000000000000..86d9753d03f04b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_large_finnish_finnish_nlp_pipeline_fi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Finnish roberta_large_finnish_finnish_nlp_pipeline pipeline RoBertaEmbeddings from Finnish-NLP +author: John Snow Labs +name: roberta_large_finnish_finnish_nlp_pipeline +date: 2024-09-03 +tags: [fi, open_source, pipeline, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_finnish_finnish_nlp_pipeline` is a Finnish model originally trained by Finnish-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_finnish_finnish_nlp_pipeline_fi_5.5.0_3.0_1725374941758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_finnish_finnish_nlp_pipeline_fi_5.5.0_3.0_1725374941758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_finnish_finnish_nlp_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_finnish_finnish_nlp_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_finnish_finnish_nlp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Finnish-NLP/roberta-large-finnish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..4125781251bded --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline pipeline RoBertaForQuestionAnswering from anas-awadalla +author: John Snow Labs +name: roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en_5.5.0_3.0_1725370443432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline_en_5.5.0_3.0_1725370443432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_base_few_shot_k_1024_finetuned_squad_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|438.6 MB| + +## References + +https://huggingface.co/anas-awadalla/roberta-base-few-shot-k-1024-finetuned-squad-seed-2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-robertachem_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-robertachem_pipeline_en.md new file mode 100644 index 00000000000000..3f72b47441fb49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-robertachem_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English robertachem_pipeline pipeline RoBertaForSequenceClassification from Chettaniiay +author: John Snow Labs +name: robertachem_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robertachem_pipeline` is a English model originally trained by Chettaniiay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robertachem_pipeline_en_5.5.0_3.0_1725337364345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robertachem_pipeline_en_5.5.0_3.0_1725337364345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("robertachem_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("robertachem_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robertachem_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|425.8 MB| + +## References + +https://huggingface.co/Chettaniiay/RoBertaChem + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en.md new file mode 100644 index 00000000000000..f7cea10dfa2dfd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline pipeline XlmRoBertaSentenceEmbeddings from JEdward7777 +author: John Snow Labs +name: sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline` is a English model originally trained by JEdward7777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en_5.5.0_3.0_1725398752078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline_en_5.5.0_3.0_1725398752078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_2_finetuned_xlm_r_masakhaner_swahili_macrolanguage_whole_word_phonetic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/JEdward7777/2-finetuned-xlm-r-masakhaner-swa-whole-word-phonetic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md new file mode 100644 index 00000000000000..12f6cc7c8a3fa4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline pipeline XlmRoBertaSentenceEmbeddings from RogerB +author: John Snow Labs +name: sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725397392863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725397392863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afro_xlmr_mini_finetuned_kintweetsd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|443.7 MB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-mini-finetuned-kintweetsD + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_bert_base_finnish_uncased_v1_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-03-sent_bert_base_finnish_uncased_v1_pipeline_fi.md new file mode 100644 index 00000000000000..fbd3c0855b9bf4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_bert_base_finnish_uncased_v1_pipeline_fi.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Finnish sent_bert_base_finnish_uncased_v1_pipeline pipeline BertSentenceEmbeddings from TurkuNLP +author: John Snow Labs +name: sent_bert_base_finnish_uncased_v1_pipeline +date: 2024-09-03 +tags: [fi, open_source, pipeline, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_finnish_uncased_v1_pipeline` is a Finnish model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_finnish_uncased_v1_pipeline_fi_5.5.0_3.0_1725355479643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_finnish_uncased_v1_pipeline_fi_5.5.0_3.0_1725355479643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_finnish_uncased_v1_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_finnish_uncased_v1_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_finnish_uncased_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|465.2 MB| + +## References + +https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_bert_kor_base_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-03-sent_bert_kor_base_pipeline_ko.md new file mode 100644 index 00000000000000..ae0b33712a701a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_bert_kor_base_pipeline_ko.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Korean sent_bert_kor_base_pipeline pipeline BertSentenceEmbeddings from kykim +author: John Snow Labs +name: sent_bert_kor_base_pipeline +date: 2024-09-03 +tags: [ko, open_source, pipeline, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_kor_base_pipeline` is a Korean model originally trained by kykim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_kor_base_pipeline_ko_5.5.0_3.0_1725355208416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_kor_base_pipeline_ko_5.5.0_3.0_1725355208416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_kor_base_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_kor_base_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_kor_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|441.7 MB| + +## References + +https://huggingface.co/kykim/bert-kor-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_bio_clinicalbert_emilyalsentzer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_bio_clinicalbert_emilyalsentzer_pipeline_en.md new file mode 100644 index 00000000000000..a2a02243710fa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_bio_clinicalbert_emilyalsentzer_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bio_clinicalbert_emilyalsentzer_pipeline pipeline BertSentenceEmbeddings from emilyalsentzer +author: John Snow Labs +name: sent_bio_clinicalbert_emilyalsentzer_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bio_clinicalbert_emilyalsentzer_pipeline` is a English model originally trained by emilyalsentzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bio_clinicalbert_emilyalsentzer_pipeline_en_5.5.0_3.0_1725355318878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bio_clinicalbert_emilyalsentzer_pipeline_en_5.5.0_3.0_1725355318878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bio_clinicalbert_emilyalsentzer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bio_clinicalbert_emilyalsentzer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bio_clinicalbert_emilyalsentzer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.9 MB| + +## References + +https://huggingface.co/emilyalsentzer/Bio_ClinicalBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_memo_final_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_memo_final_pipeline_en.md new file mode 100644 index 00000000000000..3d4b1be317185e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_memo_final_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_memo_final_pipeline pipeline XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_memo_final_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_memo_final_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_memo_final_pipeline_en_5.5.0_3.0_1725397939361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_memo_final_pipeline_en_5.5.0_3.0_1725397939361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_memo_final_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_memo_final_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_memo_final_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/memo_final + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_radbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_radbert_pipeline_en.md new file mode 100644 index 00000000000000..3eb428c7ad4a89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_radbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_radbert_pipeline pipeline BertSentenceEmbeddings from StanfordAIMI +author: John Snow Labs +name: sent_radbert_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_radbert_pipeline` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_radbert_pipeline_en_5.5.0_3.0_1725355562746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_radbert_pipeline_en_5.5.0_3.0_1725355562746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_radbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_radbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_radbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/StanfordAIMI/RadBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_twitter_xlm_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_twitter_xlm_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..2a78bb3af56dd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_twitter_xlm_roberta_base_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_twitter_xlm_roberta_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from cardiffnlp +author: John Snow Labs +name: sent_twitter_xlm_roberta_base_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_twitter_xlm_roberta_base_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_twitter_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725358189040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_twitter_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725358189040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_twitter_xlm_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_twitter_xlm_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_twitter_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_facebookai_xx.md b/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_facebookai_xx.md new file mode 100644 index 00000000000000..0898b3f1114d85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_facebookai_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_xlm_roberta_base_facebookai XlmRoBertaSentenceEmbeddings from FacebookAI +author: John Snow Labs +name: sent_xlm_roberta_base_facebookai +date: 2024-09-03 +tags: [xx, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_facebookai` is a Multilingual model originally trained by FacebookAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_facebookai_xx_5.5.0_3.0_1725359612587.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_facebookai_xx_5.5.0_3.0_1725359612587.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_facebookai","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_facebookai","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_facebookai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|652.9 MB| + +## References + +https://huggingface.co/FacebookAI/xlm-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_finetuned_wolof_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_finetuned_wolof_pipeline_en.md new file mode 100644 index 00000000000000..56948bef2c53bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sent_xlm_roberta_base_finetuned_wolof_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_wolof_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_wolof_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_wolof_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_wolof_pipeline_en_5.5.0_3.0_1725398183607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_wolof_pipeline_en_5.5.0_3.0_1725398183607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_wolof_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_wolof_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_wolof_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-wolof + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sentiment_analysis_wangyh6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-sentiment_analysis_wangyh6_pipeline_en.md new file mode 100644 index 00000000000000..a7e3d5229a78df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sentiment_analysis_wangyh6_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_analysis_wangyh6_pipeline pipeline DistilBertForSequenceClassification from wangyh6 +author: John Snow Labs +name: sentiment_analysis_wangyh6_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analysis_wangyh6_pipeline` is a English model originally trained by wangyh6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analysis_wangyh6_pipeline_en_5.5.0_3.0_1725329655335.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analysis_wangyh6_pipeline_en_5.5.0_3.0_1725329655335.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_analysis_wangyh6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_analysis_wangyh6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analysis_wangyh6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/wangyh6/sentiment-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-singlelabelrecommendationmodel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-singlelabelrecommendationmodel_pipeline_en.md new file mode 100644 index 00000000000000..bf54d637e2f7a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-singlelabelrecommendationmodel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English singlelabelrecommendationmodel_pipeline pipeline RoBertaForSequenceClassification from terrongraham +author: John Snow Labs +name: singlelabelrecommendationmodel_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`singlelabelrecommendationmodel_pipeline` is a English model originally trained by terrongraham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/singlelabelrecommendationmodel_pipeline_en_5.5.0_3.0_1725336735468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/singlelabelrecommendationmodel_pipeline_en_5.5.0_3.0_1725336735468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("singlelabelrecommendationmodel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("singlelabelrecommendationmodel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|singlelabelrecommendationmodel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|425.0 MB| + +## References + +https://huggingface.co/terrongraham/SingleLabelRecommendationModel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-sitexsometre_camembert_large_stsb100_en.md b/docs/_posts/ahmedlone127/2024-09-03-sitexsometre_camembert_large_stsb100_en.md new file mode 100644 index 00000000000000..f8cd4e096b9e73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-sitexsometre_camembert_large_stsb100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sitexsometre_camembert_large_stsb100 CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_large_stsb100 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_large_stsb100` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb100_en_5.5.0_3.0_1725378746010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb100_en_5.5.0_3.0_1725378746010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb100","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb100", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_large_stsb100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|805.5 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-large-stsb100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-splade_v3_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-03-splade_v3_distilbert_en.md new file mode 100644 index 00000000000000..633e37764387c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-splade_v3_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English splade_v3_distilbert DistilBertEmbeddings from naver +author: John Snow Labs +name: splade_v3_distilbert +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_v3_distilbert` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_v3_distilbert_en_5.5.0_3.0_1725384690392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_v3_distilbert_en_5.5.0_3.0_1725384690392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("splade_v3_distilbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("splade_v3_distilbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_v3_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/naver/splade-v3-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-squeezebert_uncased_finetuned_squad_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-squeezebert_uncased_finetuned_squad_v2_pipeline_en.md new file mode 100644 index 00000000000000..aff3c2489168c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-squeezebert_uncased_finetuned_squad_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English squeezebert_uncased_finetuned_squad_v2_pipeline pipeline BertForQuestionAnswering from ALOQAS +author: John Snow Labs +name: squeezebert_uncased_finetuned_squad_v2_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`squeezebert_uncased_finetuned_squad_v2_pipeline` is a English model originally trained by ALOQAS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/squeezebert_uncased_finetuned_squad_v2_pipeline_en_5.5.0_3.0_1725352261449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/squeezebert_uncased_finetuned_squad_v2_pipeline_en_5.5.0_3.0_1725352261449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("squeezebert_uncased_finetuned_squad_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("squeezebert_uncased_finetuned_squad_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|squeezebert_uncased_finetuned_squad_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|187.2 MB| + +## References + +https://huggingface.co/ALOQAS/squeezebert-uncased-finetuned-squad-v2 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-surgicberta_en.md b/docs/_posts/ahmedlone127/2024-09-03-surgicberta_en.md new file mode 100644 index 00000000000000..88e3fc4ba6f008 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-surgicberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English surgicberta RoBertaEmbeddings from marcobombieri +author: John Snow Labs +name: surgicberta +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`surgicberta` is a English model originally trained by marcobombieri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/surgicberta_en_5.5.0_3.0_1725374773716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/surgicberta_en_5.5.0_3.0_1725374773716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("surgicberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("surgicberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|surgicberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/marcobombieri/surgicberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-takalane_northern_sotho_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-03-takalane_northern_sotho_roberta_en.md new file mode 100644 index 00000000000000..af3a3a9ecfef76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-takalane_northern_sotho_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English takalane_northern_sotho_roberta RoBertaEmbeddings from jannesg +author: John Snow Labs +name: takalane_northern_sotho_roberta +date: 2024-09-03 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`takalane_northern_sotho_roberta` is a English model originally trained by jannesg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/takalane_northern_sotho_roberta_en_5.5.0_3.0_1725381697744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/takalane_northern_sotho_roberta_en_5.5.0_3.0_1725381697744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("takalane_northern_sotho_roberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("takalane_northern_sotho_roberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|takalane_northern_sotho_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|310.3 MB| + +## References + +https://huggingface.co/jannesg/takalane_nso_roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-tokenizerlabeller_en.md b/docs/_posts/ahmedlone127/2024-09-03-tokenizerlabeller_en.md new file mode 100644 index 00000000000000..10de611710a072 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-tokenizerlabeller_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tokenizerlabeller MarianTransformer from guymorlan +author: John Snow Labs +name: tokenizerlabeller +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tokenizerlabeller` is a English model originally trained by guymorlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tokenizerlabeller_en_5.5.0_3.0_1725405053705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tokenizerlabeller_en_5.5.0_3.0_1725405053705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("tokenizerlabeller","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("tokenizerlabeller","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tokenizerlabeller| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.6 MB| + +## References + +https://huggingface.co/guymorlan/TokenizerLabeller \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-topic_obits_en.md b/docs/_posts/ahmedlone127/2024-09-03-topic_obits_en.md new file mode 100644 index 00000000000000..4db8b0746f1403 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-topic_obits_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English topic_obits RoBertaForSequenceClassification from dell-research-harvard +author: John Snow Labs +name: topic_obits +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_obits` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_obits_en_5.5.0_3.0_1725402977020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_obits_en_5.5.0_3.0_1725402977020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("topic_obits","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("topic_obits", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_obits| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.0 MB| + +## References + +https://huggingface.co/dell-research-harvard/topic-obits \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-topic_politics_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-topic_politics_pipeline_en.md new file mode 100644 index 00000000000000..5e9e104b250d9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-topic_politics_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English topic_politics_pipeline pipeline RoBertaForSequenceClassification from dell-research-harvard +author: John Snow Labs +name: topic_politics_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_politics_pipeline` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_politics_pipeline_en_5.5.0_3.0_1725369218312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_politics_pipeline_en_5.5.0_3.0_1725369218312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("topic_politics_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("topic_politics_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_politics_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/dell-research-harvard/topic-politics + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-trained_model_distilbert_0305_en.md b/docs/_posts/ahmedlone127/2024-09-03-trained_model_distilbert_0305_en.md new file mode 100644 index 00000000000000..a84f7812df1dbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-trained_model_distilbert_0305_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trained_model_distilbert_0305 DistilBertForSequenceClassification from sciencedata +author: John Snow Labs +name: trained_model_distilbert_0305 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_model_distilbert_0305` is a English model originally trained by sciencedata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_model_distilbert_0305_en_5.5.0_3.0_1725330148215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_model_distilbert_0305_en_5.5.0_3.0_1725330148215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("trained_model_distilbert_0305","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("trained_model_distilbert_0305", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_model_distilbert_0305| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sciencedata/trained_model_distilbert_0305 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-transformer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-transformer_pipeline_en.md new file mode 100644 index 00000000000000..90a533de10e543 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-transformer_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English transformer_pipeline pipeline MPNetEmbeddings from kpourdeilami +author: John Snow Labs +name: transformer_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`transformer_pipeline` is a English model originally trained by kpourdeilami. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/transformer_pipeline_en_5.5.0_3.0_1725350494701.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/transformer_pipeline_en_5.5.0_3.0_1725350494701.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("transformer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("transformer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|transformer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/kpourdeilami/transformer + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-translation_finetuned_english_tonga_tonga_islands_jp_en.md b/docs/_posts/ahmedlone127/2024-09-03-translation_finetuned_english_tonga_tonga_islands_jp_en.md new file mode 100644 index 00000000000000..84998ed294cfe3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-translation_finetuned_english_tonga_tonga_islands_jp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English translation_finetuned_english_tonga_tonga_islands_jp MarianTransformer from ldh243 +author: John Snow Labs +name: translation_finetuned_english_tonga_tonga_islands_jp +date: 2024-09-03 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_finetuned_english_tonga_tonga_islands_jp` is a English model originally trained by ldh243. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_finetuned_english_tonga_tonga_islands_jp_en_5.5.0_3.0_1725404602969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_finetuned_english_tonga_tonga_islands_jp_en_5.5.0_3.0_1725404602969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("translation_finetuned_english_tonga_tonga_islands_jp","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("translation_finetuned_english_tonga_tonga_islands_jp","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_finetuned_english_tonga_tonga_islands_jp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|430.7 MB| + +## References + +https://huggingface.co/ldh243/translation-finetuned-en-to-jp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_dec2020_tweet_topic_multi_2020_en.md b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_dec2020_tweet_topic_multi_2020_en.md new file mode 100644 index 00000000000000..804fb1282d87bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_dec2020_tweet_topic_multi_2020_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_dec2020_tweet_topic_multi_2020 RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_dec2020_tweet_topic_multi_2020 +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_dec2020_tweet_topic_multi_2020` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_dec2020_tweet_topic_multi_2020_en_5.5.0_3.0_1725403177329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_dec2020_tweet_topic_multi_2020_en_5.5.0_3.0_1725403177329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_dec2020_tweet_topic_multi_2020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_dec2020_tweet_topic_multi_2020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_dec2020_tweet_topic_multi_2020| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.4 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-dec2020-tweet-topic-multi-2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_sep2020_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_sep2020_pipeline_en.md new file mode 100644 index 00000000000000..1bab901fefd1a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_sep2020_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_sep2020_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_sep2020_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_sep2020_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sep2020_pipeline_en_5.5.0_3.0_1725382229219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sep2020_pipeline_en_5.5.0_3.0_1725382229219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_sep2020_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_sep2020_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_sep2020_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-sep2020 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_topic_sentiment_latest_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_topic_sentiment_latest_pipeline_en.md new file mode 100644 index 00000000000000..7fb27689453d0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-twitter_roberta_base_topic_sentiment_latest_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_topic_sentiment_latest_pipeline pipeline RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_topic_sentiment_latest_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_topic_sentiment_latest_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_topic_sentiment_latest_pipeline_en_5.5.0_3.0_1725402172018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_topic_sentiment_latest_pipeline_en_5.5.0_3.0_1725402172018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_topic_sentiment_latest_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_topic_sentiment_latest_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_topic_sentiment_latest_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.2 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-topic-sentiment-latest + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-twitter_sentiment_analysis_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-twitter_sentiment_analysis_v2_pipeline_en.md new file mode 100644 index 00000000000000..42e6e8fb3cb92b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-twitter_sentiment_analysis_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_sentiment_analysis_v2_pipeline pipeline DistilBertForSequenceClassification from mliamsinclair +author: John Snow Labs +name: twitter_sentiment_analysis_v2_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_sentiment_analysis_v2_pipeline` is a English model originally trained by mliamsinclair. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_sentiment_analysis_v2_pipeline_en_5.5.0_3.0_1725394054179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_sentiment_analysis_v2_pipeline_en_5.5.0_3.0_1725394054179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_sentiment_analysis_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_sentiment_analysis_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_sentiment_analysis_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/mliamsinclair/twitter-sentiment-analysis-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en.md new file mode 100644 index 00000000000000..e4d96b5a906093 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English withinapps_ndd_pagekit_test_tags_cwadj_pipeline pipeline DistilBertForSequenceClassification from lgk03 +author: John Snow Labs +name: withinapps_ndd_pagekit_test_tags_cwadj_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`withinapps_ndd_pagekit_test_tags_cwadj_pipeline` is a English model originally trained by lgk03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en_5.5.0_3.0_1725330223607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/withinapps_ndd_pagekit_test_tags_cwadj_pipeline_en_5.5.0_3.0_1725330223607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("withinapps_ndd_pagekit_test_tags_cwadj_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("withinapps_ndd_pagekit_test_tags_cwadj_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|withinapps_ndd_pagekit_test_tags_cwadj_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lgk03/WITHINAPPS_NDD-pagekit_test-tags-CWAdj + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_r_galen_meddocan_es.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_r_galen_meddocan_es.md new file mode 100644 index 00000000000000..455b246265ac3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_r_galen_meddocan_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish xlm_r_galen_meddocan XlmRoBertaForTokenClassification from IIC +author: John Snow Labs +name: xlm_r_galen_meddocan +date: 2024-09-03 +tags: [es, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_r_galen_meddocan` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_r_galen_meddocan_es_5.5.0_3.0_1725348544674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_r_galen_meddocan_es_5.5.0_3.0_1725348544674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_r_galen_meddocan","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_r_galen_meddocan", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_r_galen_meddocan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|1.0 GB| + +## References + +https://huggingface.co/IIC/XLM_R_Galen-meddocan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_r_with_transliteration_max_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_r_with_transliteration_max_pipeline_en.md new file mode 100644 index 00000000000000..f4bda7910f720e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_r_with_transliteration_max_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_r_with_transliteration_max_pipeline pipeline XlmRoBertaEmbeddings from yihongLiu +author: John Snow Labs +name: xlm_r_with_transliteration_max_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_r_with_transliteration_max_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_r_with_transliteration_max_pipeline_en_5.5.0_3.0_1725353062042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_r_with_transliteration_max_pipeline_en_5.5.0_3.0_1725353062042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_r_with_transliteration_max_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_r_with_transliteration_max_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_r_with_transliteration_max_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.3 MB| + +## References + +https://huggingface.co/yihongLiu/xlm-r-with-transliteration-max + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en.md new file mode 100644 index 00000000000000..0c496a0d9d296e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en_5.5.0_3.0_1725380066786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline_en_5.5.0_3.0_1725380066786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|852.8 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-15-3-2023-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_clinais_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_clinais_pipeline_en.md new file mode 100644 index 00000000000000..539f573ebfeac4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_clinais_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_clinais_pipeline pipeline XlmRoBertaEmbeddings from joheras +author: John Snow Labs +name: xlm_roberta_base_finetuned_clinais_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_clinais_pipeline` is a English model originally trained by joheras. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_clinais_pipeline_en_5.5.0_3.0_1725391255350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_clinais_pipeline_en_5.5.0_3.0_1725391255350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_clinais_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_clinais_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_clinais_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|988.3 MB| + +## References + +https://huggingface.co/joheras/xlm-roberta-base-finetuned-clinais + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_marc_english_test_rundi_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_marc_english_test_rundi_en.md new file mode 100644 index 00000000000000..f65b99a707de34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_marc_english_test_rundi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_test_rundi XlmRoBertaForSequenceClassification from shaer +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_test_rundi +date: 2024-09-03 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_test_rundi` is a English model originally trained by shaer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_test_rundi_en_5.5.0_3.0_1725396860237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_test_rundi_en_5.5.0_3.0_1725396860237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_test_rundi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_test_rundi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_test_rundi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.4 MB| + +## References + +https://huggingface.co/shaer/xlm-roberta-base-finetuned-marc-en-test-run \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_panx_all_the_neural_networker_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_panx_all_the_neural_networker_en.md new file mode 100644 index 00000000000000..2a8d1e08152a96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_finetuned_panx_all_the_neural_networker_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_the_neural_networker XlmRoBertaForTokenClassification from the-neural-networker +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_the_neural_networker +date: 2024-09-03 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_the_neural_networker` is a English model originally trained by the-neural-networker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_the_neural_networker_en_5.5.0_3.0_1725348826008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_the_neural_networker_en_5.5.0_3.0_1725348826008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_the_neural_networker","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_the_neural_networker", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_the_neural_networker| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|848.1 MB| + +## References + +https://huggingface.co/the-neural-networker/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_lcc_english_2e_5_42_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_lcc_english_2e_5_42_pipeline_en.md new file mode 100644 index 00000000000000..689d55d362a147 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_lcc_english_2e_5_42_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_lcc_english_2e_5_42_pipeline pipeline XlmRoBertaForSequenceClassification from EhsanAghazadeh +author: John Snow Labs +name: xlm_roberta_base_lcc_english_2e_5_42_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lcc_english_2e_5_42_pipeline` is a English model originally trained by EhsanAghazadeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lcc_english_2e_5_42_pipeline_en_5.5.0_3.0_1725396313039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lcc_english_2e_5_42_pipeline_en_5.5.0_3.0_1725396313039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_lcc_english_2e_5_42_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_lcc_english_2e_5_42_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lcc_english_2e_5_42_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|806.8 MB| + +## References + +https://huggingface.co/EhsanAghazadeh/xlm-roberta-base-lcc-en-2e-5-42 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_longformer_4096_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_longformer_4096_pipeline_en.md new file mode 100644 index 00000000000000..c5f8b1a8088c35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_longformer_4096_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_longformer_4096_pipeline pipeline XlmRoBertaEmbeddings from ogaloglu +author: John Snow Labs +name: xlm_roberta_base_longformer_4096_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_longformer_4096_pipeline` is a English model originally trained by ogaloglu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_longformer_4096_pipeline_en_5.5.0_3.0_1725405797880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_longformer_4096_pipeline_en_5.5.0_3.0_1725405797880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_longformer_4096_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_longformer_4096_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_longformer_4096_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ogaloglu/xlm-roberta-base-longformer-4096 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en.md new file mode 100644 index 00000000000000..ec20f516fae0c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad2_idkmrc_clickbaitspoiling XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlm_roberta_base_squad2_idkmrc_clickbaitspoiling +date: 2024-09-03 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad2_idkmrc_clickbaitspoiling` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en_5.5.0_3.0_1725379718945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_idkmrc_clickbaitspoiling_en_5.5.0_3.0_1725379718945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_idkmrc_clickbaitspoiling","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_idkmrc_clickbaitspoiling", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad2_idkmrc_clickbaitspoiling| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|884.8 MB| + +## References + +https://huggingface.co/intanm/xlm-roberta-base-squad2-idkmrc-clickbaitspoiling \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_large_qa_norwegian_eanderson_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_large_qa_norwegian_eanderson_en.md new file mode 100644 index 00000000000000..3cc814442f6291 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_large_qa_norwegian_eanderson_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_large_qa_norwegian_eanderson XlmRoBertaForQuestionAnswering from eanderson +author: John Snow Labs +name: xlm_roberta_large_qa_norwegian_eanderson +date: 2024-09-03 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_large_qa_norwegian_eanderson` is a English model originally trained by eanderson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_large_qa_norwegian_eanderson_en_5.5.0_3.0_1725381052963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_large_qa_norwegian_eanderson_en_5.5.0_3.0_1725381052963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_large_qa_norwegian_eanderson","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_large_qa_norwegian_eanderson", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_large_qa_norwegian_eanderson| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|851.3 MB| + +## References + +https://huggingface.co/eanderson/xlm-roberta-large-qa_norwegian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_squad_nepali_translated_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_squad_nepali_translated_squad_pipeline_en.md new file mode 100644 index 00000000000000..1d0c0f48d38d54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlm_roberta_squad_nepali_translated_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_squad_nepali_translated_squad_pipeline pipeline XlmRoBertaForQuestionAnswering from Yunika +author: John Snow Labs +name: xlm_roberta_squad_nepali_translated_squad_pipeline +date: 2024-09-03 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_squad_nepali_translated_squad_pipeline` is a English model originally trained by Yunika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_squad_nepali_translated_squad_pipeline_en_5.5.0_3.0_1725381210771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_squad_nepali_translated_squad_pipeline_en_5.5.0_3.0_1725381210771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_squad_nepali_translated_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_squad_nepali_translated_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_squad_nepali_translated_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|880.6 MB| + +## References + +https://huggingface.co/Yunika/xlm-roberta-squad-nepali-translated-squad + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_base_finetuned_naija_pipeline_pcm.md b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_base_finetuned_naija_pipeline_pcm.md new file mode 100644 index 00000000000000..de36cd97913cb4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_base_finetuned_naija_pipeline_pcm.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Nigerian Pidgin xlmroberta_ner_base_finetuned_naija_pipeline pipeline XlmRoBertaForTokenClassification from mbeukman +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_naija_pipeline +date: 2024-09-03 +tags: [pcm, open_source, pipeline, onnx] +task: Named Entity Recognition +language: pcm +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_finetuned_naija_pipeline` is a Nigerian Pidgin model originally trained by mbeukman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_naija_pipeline_pcm_5.5.0_3.0_1725373169842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_naija_pipeline_pcm_5.5.0_3.0_1725373169842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_base_finetuned_naija_pipeline", lang = "pcm") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_base_finetuned_naija_pipeline", lang = "pcm") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_naija_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pcm| +|Size:|778.0 MB| + +## References + +https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-ner-naija + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_hugsao123_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_hugsao123_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..7b681a8e3fdfb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_hugsao123_base_finetuned_panx_de.md @@ -0,0 +1,113 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from hugsao123) +author: John Snow Labs +name: xlmroberta_ner_hugsao123_base_finetuned_panx +date: 2024-09-03 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `hugsao123`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_hugsao123_base_finetuned_panx_de_5.5.0_3.0_1725372739793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_hugsao123_base_finetuned_panx_de_5.5.0_3.0_1725372739793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_hugsao123_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_hugsao123_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_hugsao123").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_hugsao123_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|826.4 MB| + +## References + +References + +- https://huggingface.co/hugsao123/xlm-roberta-base-finetuned-panx-de +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_transformersbook_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_transformersbook_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..4dfe2db3314e81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-03-xlmroberta_ner_transformersbook_base_finetuned_panx_de.md @@ -0,0 +1,115 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from transformersbook) +author: John Snow Labs +name: xlmroberta_ner_transformersbook_base_finetuned_panx +date: 2024-09-03 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `transformersbook`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_transformersbook_base_finetuned_panx_de_5.5.0_3.0_1725372689188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_transformersbook_base_finetuned_panx_de_5.5.0_3.0_1725372689188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_transformersbook_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_transformersbook_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_transformersbook").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_transformersbook_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.8 MB| + +## References + +References + +- https://huggingface.co/transformersbook/xlm-roberta-base-finetuned-panx-de +- https://learning.oreilly.com/library/view/natural-language-processing/9781098103231/ +- https://github.com/nlp-with-transformers/notebooks/blob/main/04_multilingual-ner.ipynb +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en.md b/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en.md new file mode 100644 index 00000000000000..b9973279b26862 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 014_microsoft_deberta_v3_base_finetuned_yahoo_80_20 DeBertaForSequenceClassification from diogopaes10 +author: John Snow Labs +name: 014_microsoft_deberta_v3_base_finetuned_yahoo_80_20 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`014_microsoft_deberta_v3_base_finetuned_yahoo_80_20` is a English model originally trained by diogopaes10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en_5.5.0_3.0_1725467508571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_en_5.5.0_3.0_1725467508571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("014_microsoft_deberta_v3_base_finetuned_yahoo_80_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("014_microsoft_deberta_v3_base_finetuned_yahoo_80_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|014_microsoft_deberta_v3_base_finetuned_yahoo_80_20| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|576.6 MB| + +## References + +https://huggingface.co/diogopaes10/014-microsoft-deberta-v3-base-finetuned-yahoo-80_20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en.md new file mode 100644 index 00000000000000..61b358804b6b13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline pipeline DeBertaForSequenceClassification from diogopaes10 +author: John Snow Labs +name: 014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline` is a English model originally trained by diogopaes10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en_5.5.0_3.0_1725467557866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline_en_5.5.0_3.0_1725467557866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|014_microsoft_deberta_v3_base_finetuned_yahoo_80_20_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|576.6 MB| + +## References + +https://huggingface.co/diogopaes10/014-microsoft-deberta-v3-base-finetuned-yahoo-80_20 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-32_shot_twitter_2classes_head_body_en.md b/docs/_posts/ahmedlone127/2024-09-04-32_shot_twitter_2classes_head_body_en.md new file mode 100644 index 00000000000000..41d7704e6324f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-32_shot_twitter_2classes_head_body_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English 32_shot_twitter_2classes_head_body MPNetEmbeddings from Nhat1904 +author: John Snow Labs +name: 32_shot_twitter_2classes_head_body +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`32_shot_twitter_2classes_head_body` is a English model originally trained by Nhat1904. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/32_shot_twitter_2classes_head_body_en_5.5.0_3.0_1725470685903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/32_shot_twitter_2classes_head_body_en_5.5.0_3.0_1725470685903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("32_shot_twitter_2classes_head_body","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("32_shot_twitter_2classes_head_body","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|32_shot_twitter_2classes_head_body| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Nhat1904/32-shot-twitter-2classes-head-body \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-600_stmodel_brand_rem_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-600_stmodel_brand_rem_pipeline_en.md new file mode 100644 index 00000000000000..21c8e4693a5747 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-600_stmodel_brand_rem_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 600_stmodel_brand_rem_pipeline pipeline MPNetEmbeddings from jamiehudson +author: John Snow Labs +name: 600_stmodel_brand_rem_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`600_stmodel_brand_rem_pipeline` is a English model originally trained by jamiehudson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/600_stmodel_brand_rem_pipeline_en_5.5.0_3.0_1725470805220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/600_stmodel_brand_rem_pipeline_en_5.5.0_3.0_1725470805220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("600_stmodel_brand_rem_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("600_stmodel_brand_rem_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|600_stmodel_brand_rem_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jamiehudson/600-STmodel-brand-rem + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_en.md new file mode 100644 index 00000000000000..f8de89f6a1b4be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English aditya_ner DistilBertForTokenClassification from cleopatro +author: John Snow Labs +name: aditya_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aditya_ner` is a English model originally trained by cleopatro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aditya_ner_en_5.5.0_3.0_1725448795818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aditya_ner_en_5.5.0_3.0_1725448795818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("aditya_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("aditya_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aditya_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/cleopatro/Aditya_NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_pipeline_en.md new file mode 100644 index 00000000000000..b58017197175cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-aditya_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English aditya_ner_pipeline pipeline DistilBertForTokenClassification from cleopatro +author: John Snow Labs +name: aditya_ner_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aditya_ner_pipeline` is a English model originally trained by cleopatro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aditya_ner_pipeline_en_5.5.0_3.0_1725448808740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aditya_ner_pipeline_en_5.5.0_3.0_1725448808740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("aditya_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("aditya_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aditya_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/cleopatro/Aditya_NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-afriberta_base_finetuned_hausa_2e_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-afriberta_base_finetuned_hausa_2e_4_pipeline_en.md new file mode 100644 index 00000000000000..8061f0e8a41844 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-afriberta_base_finetuned_hausa_2e_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English afriberta_base_finetuned_hausa_2e_4_pipeline pipeline XlmRoBertaForTokenClassification from grace-pro +author: John Snow Labs +name: afriberta_base_finetuned_hausa_2e_4_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afriberta_base_finetuned_hausa_2e_4_pipeline` is a English model originally trained by grace-pro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afriberta_base_finetuned_hausa_2e_4_pipeline_en_5.5.0_3.0_1725424526929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afriberta_base_finetuned_hausa_2e_4_pipeline_en_5.5.0_3.0_1725424526929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("afriberta_base_finetuned_hausa_2e_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("afriberta_base_finetuned_hausa_2e_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afriberta_base_finetuned_hausa_2e_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|415.3 MB| + +## References + +https://huggingface.co/grace-pro/afriberta-base-finetuned-hausa-2e-4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-akai_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-akai_ner_en.md new file mode 100644 index 00000000000000..54df15415e8f85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-akai_ner_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English akai_ner DistilBertForTokenClassification from GautamR +author: John Snow Labs +name: akai_ner +date: 2024-09-04 +tags: [bert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akai_ner` is a English model originally trained by GautamR. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akai_ner_en_5.5.0_3.0_1725449028577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akai_ner_en_5.5.0_3.0_1725449028577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +tokenClassifier = DistilBertForTokenClassification.pretrained("akai_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val tokenClassifier = DistilBertForTokenClassification + .pretrained("akai_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akai_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +References + +https://huggingface.co/GautamR/akai_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_base_chinese_ws_zh.md b/docs/_posts/ahmedlone127/2024-09-04-albert_base_chinese_ws_zh.md new file mode 100644 index 00000000000000..fb19cf4689392b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_base_chinese_ws_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese albert_base_chinese_ws BertForTokenClassification from ckiplab +author: John Snow Labs +name: albert_base_chinese_ws +date: 2024-09-04 +tags: [zh, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_chinese_ws` is a Chinese model originally trained by ckiplab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_chinese_ws_zh_5.5.0_3.0_1725449513061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_chinese_ws_zh_5.5.0_3.0_1725449513061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("albert_base_chinese_ws","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("albert_base_chinese_ws", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_chinese_ws| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|zh| +|Size:|37.5 MB| + +## References + +https://huggingface.co/ckiplab/albert-base-chinese-ws \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_base_qa_coqa_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-albert_base_qa_coqa_1_pipeline_en.md new file mode 100644 index 00000000000000..e788be92b707b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_base_qa_coqa_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English albert_base_qa_coqa_1_pipeline pipeline AlbertForQuestionAnswering from mateiaass +author: John Snow Labs +name: albert_base_qa_coqa_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_qa_coqa_1_pipeline` is a English model originally trained by mateiaass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_qa_coqa_1_pipeline_en_5.5.0_3.0_1725415036973.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_qa_coqa_1_pipeline_en_5.5.0_3.0_1725415036973.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_qa_coqa_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_qa_coqa_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_qa_coqa_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/mateiaass/albert-base-qa-coQA-1 + +## Included Models + +- MultiDocumentAssembler +- AlbertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_finetuned_ner_minhminh09_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_finetuned_ner_minhminh09_pipeline_en.md new file mode 100644 index 00000000000000..5076a9fbdb3b9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_finetuned_ner_minhminh09_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v2_finetuned_ner_minhminh09_pipeline pipeline AlbertForTokenClassification from MinhMinh09 +author: John Snow Labs +name: albert_base_v2_finetuned_ner_minhminh09_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_finetuned_ner_minhminh09_pipeline` is a English model originally trained by MinhMinh09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_ner_minhminh09_pipeline_en_5.5.0_3.0_1725487146608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_ner_minhminh09_pipeline_en_5.5.0_3.0_1725487146608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v2_finetuned_ner_minhminh09_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v2_finetuned_ner_minhminh09_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_finetuned_ner_minhminh09_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/MinhMinh09/albert-base-v2-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rotten_tomatoes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rotten_tomatoes_pipeline_en.md new file mode 100644 index 00000000000000..53d1d91e50e8db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rotten_tomatoes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v2_rotten_tomatoes_pipeline pipeline AlbertEmbeddings from textattack +author: John Snow Labs +name: albert_base_v2_rotten_tomatoes_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_rotten_tomatoes_pipeline` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_rotten_tomatoes_pipeline_en_5.5.0_3.0_1725435083636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_rotten_tomatoes_pipeline_en_5.5.0_3.0_1725435083636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v2_rotten_tomatoes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v2_rotten_tomatoes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_rotten_tomatoes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-rotten_tomatoes + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rte_textattack_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rte_textattack_pipeline_en.md new file mode 100644 index 00000000000000..477e4bf7738d64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_base_v2_rte_textattack_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v2_rte_textattack_pipeline pipeline AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_rte_textattack_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_rte_textattack_pipeline` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_rte_textattack_pipeline_en_5.5.0_3.0_1725441433545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_rte_textattack_pipeline_en_5.5.0_3.0_1725441433545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v2_rte_textattack_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v2_rte_textattack_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_rte_textattack_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-RTE + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_pipeline_fa.md new file mode 100644 index 00000000000000..e24f4782de0f55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_pipeline pipeline AlbertEmbeddings from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_pipeline +date: 2024-09-04 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_pipeline` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pipeline_fa_5.5.0_3.0_1725457643724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pipeline_fa_5.5.0_3.0_1725457643724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_persian_farsi_base_v2_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_persian_farsi_base_v2_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|66.3 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_sentiment_multi_fa.md b/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_sentiment_multi_fa.md new file mode 100644 index 00000000000000..95d60ea59f0a90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_persian_farsi_base_v2_sentiment_multi_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_sentiment_multi AlbertForSequenceClassification from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_sentiment_multi +date: 2024-09-04 +tags: [fa, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_sentiment_multi` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_multi_fa_5.5.0_3.0_1725441313336.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_multi_fa_5.5.0_3.0_1725441313336.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_persian_farsi_base_v2_sentiment_multi","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_persian_farsi_base_v2_sentiment_multi", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_sentiment_multi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fa| +|Size:|68.5 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2-sentiment-multi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-albert_small_kor_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-albert_small_kor_v1_pipeline_en.md new file mode 100644 index 00000000000000..a42accdbedaac0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-albert_small_kor_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_small_kor_v1_pipeline pipeline AlbertEmbeddings from bongsoo +author: John Snow Labs +name: albert_small_kor_v1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_small_kor_v1_pipeline` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_small_kor_v1_pipeline_en_5.5.0_3.0_1725457764791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_small_kor_v1_pipeline_en_5.5.0_3.0_1725457764791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_small_kor_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_small_kor_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_small_kor_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|41.8 MB| + +## References + +https://huggingface.co/bongsoo/albert-small-kor-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_1_pipeline_en.md new file mode 100644 index 00000000000000..870e32abb06e61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_1_pipeline pipeline MPNetEmbeddings from abhijitt +author: John Snow Labs +name: all_mpnet_base_v2_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_1_pipeline` is a English model originally trained by abhijitt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_1_pipeline_en_5.5.0_3.0_1725470679948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_1_pipeline_en_5.5.0_3.0_1725470679948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/abhijitt/all-mpnet-base-v2_1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en.md new file mode 100644 index 00000000000000..1fed0ec98d6daf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_airdialogue_unlabelled_and_labelled MPNetEmbeddings from azikoss +author: John Snow Labs +name: all_mpnet_base_v2_airdialogue_unlabelled_and_labelled +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_airdialogue_unlabelled_and_labelled` is a English model originally trained by azikoss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en_5.5.0_3.0_1725469865258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_airdialogue_unlabelled_and_labelled_en_5.5.0_3.0_1725469865258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_airdialogue_unlabelled_and_labelled","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_airdialogue_unlabelled_and_labelled","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_airdialogue_unlabelled_and_labelled| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/azikoss/all-mpnet-base-v2-airdialogue-unlabelled-and-labelled \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en.md new file mode 100644 index 00000000000000..78119e1576653f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned MPNetEmbeddings from binhcode25-finetuned +author: John Snow Labs +name: all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned` is a English model originally trained by binhcode25-finetuned. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en_5.5.0_3.0_1725470890060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned_en_5.5.0_3.0_1725470890060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/binhcode25-finetuned/all-mpnet-base-v2-fine-tuned-epochs-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en.md new file mode 100644 index 00000000000000..9b17e2054e7203 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp MPNetEmbeddings from event-nlp +author: John Snow Labs +name: all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp` is a English model originally trained by event-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en_5.5.0_3.0_1725469860980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp_en_5.5.0_3.0_1725469860980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_fine_tuned_epochs_8_event_nlp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/event-nlp/all-mpnet-base-v2-fine-tuned-epochs-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_en.md new file mode 100644 index 00000000000000..4da940ba5cdd47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_firefox_margin_1_epoch_1 MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_v2_firefox_margin_1_epoch_1 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_firefox_margin_1_epoch_1` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_firefox_margin_1_epoch_1_en_5.5.0_3.0_1725470258917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_firefox_margin_1_epoch_1_en_5.5.0_3.0_1725470258917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_firefox_margin_1_epoch_1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_firefox_margin_1_epoch_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_firefox_margin_1_epoch_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-v2-firefox-margin-1-epoch-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en.md new file mode 100644 index 00000000000000..3cf0d5386f536a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline pipeline MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en_5.5.0_3.0_1725470282128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline_en_5.5.0_3.0_1725470282128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_firefox_margin_1_epoch_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-v2-firefox-margin-1-epoch-1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_survey3000_en.md b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_survey3000_en.md new file mode 100644 index 00000000000000..7aef47a8e7101b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-all_mpnet_base_v2_survey3000_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_survey3000 MPNetEmbeddings from zihoo +author: John Snow Labs +name: all_mpnet_base_v2_survey3000 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_survey3000` is a English model originally trained by zihoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_survey3000_en_5.5.0_3.0_1725470254835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_survey3000_en_5.5.0_3.0_1725470254835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_survey3000","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_survey3000","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_survey3000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/zihoo/all-mpnet-base-v2-survey3000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_en.md b/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_en.md new file mode 100644 index 00000000000000..081630e1bc1b80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English answer_equivalence_distilbert_zli12321 DistilBertForSequenceClassification from zli12321 +author: John Snow Labs +name: answer_equivalence_distilbert_zli12321 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`answer_equivalence_distilbert_zli12321` is a English model originally trained by zli12321. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/answer_equivalence_distilbert_zli12321_en_5.5.0_3.0_1725489895355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/answer_equivalence_distilbert_zli12321_en_5.5.0_3.0_1725489895355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("answer_equivalence_distilbert_zli12321","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("answer_equivalence_distilbert_zli12321", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|answer_equivalence_distilbert_zli12321| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/zli12321/answer_equivalence_distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_pipeline_en.md new file mode 100644 index 00000000000000..3685baea521026 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-answer_equivalence_distilbert_zli12321_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English answer_equivalence_distilbert_zli12321_pipeline pipeline DistilBertForSequenceClassification from zli12321 +author: John Snow Labs +name: answer_equivalence_distilbert_zli12321_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`answer_equivalence_distilbert_zli12321_pipeline` is a English model originally trained by zli12321. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/answer_equivalence_distilbert_zli12321_pipeline_en_5.5.0_3.0_1725489907453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/answer_equivalence_distilbert_zli12321_pipeline_en_5.5.0_3.0_1725489907453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("answer_equivalence_distilbert_zli12321_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("answer_equivalence_distilbert_zli12321_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|answer_equivalence_distilbert_zli12321_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/zli12321/answer_equivalence_distilbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-argureviews_component_deberta_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-argureviews_component_deberta_v1_pipeline_en.md new file mode 100644 index 00000000000000..2fee4743df3671 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-argureviews_component_deberta_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English argureviews_component_deberta_v1_pipeline pipeline DeBertaForSequenceClassification from nihiluis +author: John Snow Labs +name: argureviews_component_deberta_v1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`argureviews_component_deberta_v1_pipeline` is a English model originally trained by nihiluis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/argureviews_component_deberta_v1_pipeline_en_5.5.0_3.0_1725469116085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/argureviews_component_deberta_v1_pipeline_en_5.5.0_3.0_1725469116085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("argureviews_component_deberta_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("argureviews_component_deberta_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|argureviews_component_deberta_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/nihiluis/argureviews-component-deberta_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-astroentities_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-astroentities_pipeline_en.md new file mode 100644 index 00000000000000..88a1dacba3b633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-astroentities_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English astroentities_pipeline pipeline DistilBertForTokenClassification from teamzalenski +author: John Snow Labs +name: astroentities_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`astroentities_pipeline` is a English model originally trained by teamzalenski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/astroentities_pipeline_en_5.5.0_3.0_1725492836199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/astroentities_pipeline_en_5.5.0_3.0_1725492836199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("astroentities_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("astroentities_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|astroentities_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/teamzalenski/astroentities + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-autonlp_covid_fake_news_36839110_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-autonlp_covid_fake_news_36839110_pipeline_en.md new file mode 100644 index 00000000000000..e3bb4f593efd68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-autonlp_covid_fake_news_36839110_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autonlp_covid_fake_news_36839110_pipeline pipeline AlbertForSequenceClassification from dtam +author: John Snow Labs +name: autonlp_covid_fake_news_36839110_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autonlp_covid_fake_news_36839110_pipeline` is a English model originally trained by dtam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autonlp_covid_fake_news_36839110_pipeline_en_5.5.0_3.0_1725488647715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autonlp_covid_fake_news_36839110_pipeline_en_5.5.0_3.0_1725488647715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autonlp_covid_fake_news_36839110_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autonlp_covid_fake_news_36839110_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autonlp_covid_fake_news_36839110_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/dtam/autonlp-covid-fake-news-36839110 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-autotrain_3_xlmr_fulltext_53881126794_en.md b/docs/_posts/ahmedlone127/2024-09-04-autotrain_3_xlmr_fulltext_53881126794_en.md new file mode 100644 index 00000000000000..ee413041bbbc6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-autotrain_3_xlmr_fulltext_53881126794_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English autotrain_3_xlmr_fulltext_53881126794 XlmRoBertaForTokenClassification from tinyYhorm +author: John Snow Labs +name: autotrain_3_xlmr_fulltext_53881126794 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_3_xlmr_fulltext_53881126794` is a English model originally trained by tinyYhorm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_3_xlmr_fulltext_53881126794_en_5.5.0_3.0_1725423769929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_3_xlmr_fulltext_53881126794_en_5.5.0_3.0_1725423769929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("autotrain_3_xlmr_fulltext_53881126794","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("autotrain_3_xlmr_fulltext_53881126794", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_3_xlmr_fulltext_53881126794| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|769.4 MB| + +## References + +https://huggingface.co/tinyYhorm/autotrain-3-xlmr-fulltext-53881126794 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_en.md b/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_en.md new file mode 100644 index 00000000000000..1ec9bbc5e205f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English autotrain_htyqd_ivazp DistilBertForTokenClassification from bikashpatra +author: John Snow Labs +name: autotrain_htyqd_ivazp +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_htyqd_ivazp` is a English model originally trained by bikashpatra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_htyqd_ivazp_en_5.5.0_3.0_1725492782973.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_htyqd_ivazp_en_5.5.0_3.0_1725492782973.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("autotrain_htyqd_ivazp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("autotrain_htyqd_ivazp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_htyqd_ivazp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/bikashpatra/autotrain-htyqd-ivazp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_pipeline_en.md new file mode 100644 index 00000000000000..536528dc2fe829 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-autotrain_htyqd_ivazp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autotrain_htyqd_ivazp_pipeline pipeline DistilBertForTokenClassification from bikashpatra +author: John Snow Labs +name: autotrain_htyqd_ivazp_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_htyqd_ivazp_pipeline` is a English model originally trained by bikashpatra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_htyqd_ivazp_pipeline_en_5.5.0_3.0_1725492796031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_htyqd_ivazp_pipeline_en_5.5.0_3.0_1725492796031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_htyqd_ivazp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_htyqd_ivazp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_htyqd_ivazp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/bikashpatra/autotrain-htyqd-ivazp + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_en.md b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_en.md new file mode 100644 index 00000000000000..f5a67729430a05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English babyberta_wiki_finetuned_squad_v1 RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wiki_finetuned_squad_v1 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wiki_finetuned_squad_v1` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wiki_finetuned_squad_v1_en_5.5.0_3.0_1725479952525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wiki_finetuned_squad_v1_en_5.5.0_3.0_1725479952525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wiki_finetuned_squad_v1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wiki_finetuned_squad_v1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wiki_finetuned_squad_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/babyberta-wiki-finetuned-squad-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_pipeline_en.md new file mode 100644 index 00000000000000..3a6be66578f65c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wiki_finetuned_squad_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wiki_finetuned_squad_v1_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wiki_finetuned_squad_v1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wiki_finetuned_squad_v1_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wiki_finetuned_squad_v1_pipeline_en_5.5.0_3.0_1725479954554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wiki_finetuned_squad_v1_pipeline_en_5.5.0_3.0_1725479954554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wiki_finetuned_squad_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wiki_finetuned_squad_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wiki_finetuned_squad_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/babyberta-wiki-finetuned-squad-v1 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en.md new file mode 100644 index 00000000000000..b864ff8abecc96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en_5.5.0_3.0_1725484327841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline_en_5.5.0_3.0_1725484327841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia1_2_5_with_masking_run3_finetuned_qamr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/BabyBERTa-Wikipedia1_2.5-with-Masking_run3-finetuned-QAMR + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en.md b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en.md new file mode 100644 index 00000000000000..6aefcbd5f4f84b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English babyberta_wikipedia_2_5_0_1_finetuned_qasrl RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia_2_5_0_1_finetuned_qasrl +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia_2_5_0_1_finetuned_qasrl` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en_5.5.0_3.0_1725478981039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_2_5_0_1_finetuned_qasrl_en_5.5.0_3.0_1725478981039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wikipedia_2_5_0_1_finetuned_qasrl","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wikipedia_2_5_0_1_finetuned_qasrl", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia_2_5_0_1_finetuned_qasrl| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/babyberta-Wikipedia_2.5-0.1-finetuned-QASRL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en.md new file mode 100644 index 00000000000000..81b7ff522cdc11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en_5.5.0_3.0_1725478987966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline_en_5.5.0_3.0_1725478987966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia_2_5_0_1_finetuned_qasrl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/babyberta-Wikipedia_2.5-0.1-finetuned-QASRL + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_ner_anglicisms_spanish_mbert_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_anglicisms_spanish_mbert_pipeline_es.md new file mode 100644 index 00000000000000..6c69e136417860 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_anglicisms_spanish_mbert_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bert_ner_anglicisms_spanish_mbert_pipeline pipeline BertForTokenClassification from lirondos +author: John Snow Labs +name: bert_ner_anglicisms_spanish_mbert_pipeline +date: 2024-09-04 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_anglicisms_spanish_mbert_pipeline` is a Castilian, Spanish model originally trained by lirondos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_anglicisms_spanish_mbert_pipeline_es_5.5.0_3.0_1725450416174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_anglicisms_spanish_mbert_pipeline_es_5.5.0_3.0_1725450416174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_anglicisms_spanish_mbert_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_anglicisms_spanish_mbert_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_anglicisms_spanish_mbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|665.1 MB| + +## References + +https://huggingface.co/lirondos/anglicisms-spanish-mbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx.md new file mode 100644 index 00000000000000..22d1403e8e5dfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline pipeline BertForTokenClassification from StivenLancheros +author: John Snow Labs +name: bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline` is a Multilingual model originally trained by StivenLancheros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx_5.5.0_3.0_1725477506476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline_xx_5.5.0_3.0_1725477506476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmented_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|403.7 MB| + +## References + +https://huggingface.co/StivenLancheros/biobert-base-cased-v1.2-finetuned-ner-CRAFT_Augmented_EN + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_en.md new file mode 100644 index 00000000000000..ae6d2749e074a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_ner_skills BertForTokenClassification from Pot-l +author: John Snow Labs +name: bert_ner_skills +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_skills` is a English model originally trained by Pot-l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_skills_en_5.5.0_3.0_1725477361849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_skills_en_5.5.0_3.0_1725477361849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_skills","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_skills", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_skills| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Pot-l/bert-ner-skills \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_pipeline_en.md new file mode 100644 index 00000000000000..a50e6a6e8d5fe1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_ner_skills_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_skills_pipeline pipeline BertForTokenClassification from Pot-l +author: John Snow Labs +name: bert_ner_skills_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_skills_pipeline` is a English model originally trained by Pot-l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_skills_pipeline_en_5.5.0_3.0_1725477385248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_skills_pipeline_en_5.5.0_3.0_1725477385248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_skills_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_skills_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_skills_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Pot-l/bert-ner-skills + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en.md new file mode 100644 index 00000000000000..32411c219d69c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian DistilBertForSequenceClassification from C0uchP0tat0 +author: John Snow Labs +name: bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian` is a English model originally trained by C0uchP0tat0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en_5.5.0_3.0_1725489860472.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_en_5.5.0_3.0_1725489860472.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.6 MB| + +## References + +https://huggingface.co/C0uchP0tat0/bert-reviews-online-courses-sentiment-analysis-sravni.ru-corp-ru \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en.md new file mode 100644 index 00000000000000..86ca412930542e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline pipeline DistilBertForSequenceClassification from C0uchP0tat0 +author: John Snow Labs +name: bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline` is a English model originally trained by C0uchP0tat0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en_5.5.0_3.0_1725489884934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline_en_5.5.0_3.0_1725489884934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_reviews_online_courses_sentiment_analysis_sravni_russian_corp_russian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.6 MB| + +## References + +https://huggingface.co/C0uchP0tat0/bert-reviews-online-courses-sentiment-analysis-sravni.ru-corp-ru + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_sayula_popoluca_estbert_xpos_128_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_sayula_popoluca_estbert_xpos_128_en.md new file mode 100644 index 00000000000000..2b4430d63dd6e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_sayula_popoluca_estbert_xpos_128_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_sayula_popoluca_estbert_xpos_128 BertForTokenClassification from tartuNLP +author: John Snow Labs +name: bert_sayula_popoluca_estbert_xpos_128 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sayula_popoluca_estbert_xpos_128` is a English model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sayula_popoluca_estbert_xpos_128_en_5.5.0_3.0_1725478120684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sayula_popoluca_estbert_xpos_128_en_5.5.0_3.0_1725478120684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_sayula_popoluca_estbert_xpos_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_sayula_popoluca_estbert_xpos_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sayula_popoluca_estbert_xpos_128| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/tartuNLP/EstBERT_XPOS_128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_sequence_classifier_coronabert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-bert_sequence_classifier_coronabert_pipeline_en.md new file mode 100644 index 00000000000000..fac80a26c2fc8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_sequence_classifier_coronabert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_sequence_classifier_coronabert_pipeline pipeline BertForSequenceClassification from jakelever +author: John Snow Labs +name: bert_sequence_classifier_coronabert_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_sequence_classifier_coronabert_pipeline` is a English model originally trained by jakelever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_coronabert_pipeline_en_5.5.0_3.0_1725433174407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_sequence_classifier_coronabert_pipeline_en_5.5.0_3.0_1725433174407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_sequence_classifier_coronabert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_sequence_classifier_coronabert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_sequence_classifier_coronabert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.4 MB| + +## References + +https://huggingface.co/jakelever/coronabert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-04-bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr.md new file mode 100644 index 00000000000000..912b93c6f8b4ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish bert_token_classifier_berturk_128k_keyword_discriminator_pipeline pipeline BertForTokenClassification from yanekyuk +author: John Snow Labs +name: bert_token_classifier_berturk_128k_keyword_discriminator_pipeline +date: 2024-09-04 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_berturk_128k_keyword_discriminator_pipeline` is a Turkish model originally trained by yanekyuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr_5.5.0_3.0_1725477985434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_128k_keyword_discriminator_pipeline_tr_5.5.0_3.0_1725477985434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_berturk_128k_keyword_discriminator_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_berturk_128k_keyword_discriminator_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_berturk_128k_keyword_discriminator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|689.0 MB| + +## References + +https://huggingface.co/yanekyuk/berturk-128k-keyword-discriminator + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bertimbau_large_ner_total_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-04-bertimbau_large_ner_total_pipeline_pt.md new file mode 100644 index 00000000000000..1f82dba532718e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bertimbau_large_ner_total_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese bertimbau_large_ner_total_pipeline pipeline BertForTokenClassification from marquesafonso +author: John Snow Labs +name: bertimbau_large_ner_total_pipeline +date: 2024-09-04 +tags: [pt, open_source, pipeline, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_large_ner_total_pipeline` is a Portuguese model originally trained by marquesafonso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_total_pipeline_pt_5.5.0_3.0_1725477385153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_total_pipeline_pt_5.5.0_3.0_1725477385153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertimbau_large_ner_total_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertimbau_large_ner_total_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_large_ner_total_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|406.0 MB| + +## References + +https://huggingface.co/marquesafonso/bertimbau-large-ner-total + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_fr.md b/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_fr.md new file mode 100644 index 00000000000000..ce8a17ed3628a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French bertweetfr_base CamemBertEmbeddings from Yanzhu +author: John Snow Labs +name: bertweetfr_base +date: 2024-09-04 +tags: [fr, open_source, onnx, embeddings, camembert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertweetfr_base` is a French model originally trained by Yanzhu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertweetfr_base_fr_5.5.0_3.0_1725409061191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertweetfr_base_fr_5.5.0_3.0_1725409061191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("bertweetfr_base","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("bertweetfr_base","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertweetfr_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|fr| +|Size:|412.8 MB| + +## References + +https://huggingface.co/Yanzhu/bertweetfr-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_pipeline_fr.md new file mode 100644 index 00000000000000..1354ee8fe8ed13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bertweetfr_base_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French bertweetfr_base_pipeline pipeline CamemBertEmbeddings from Yanzhu +author: John Snow Labs +name: bertweetfr_base_pipeline +date: 2024-09-04 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertweetfr_base_pipeline` is a French model originally trained by Yanzhu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertweetfr_base_pipeline_fr_5.5.0_3.0_1725409083209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertweetfr_base_pipeline_fr_5.5.0_3.0_1725409083209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertweetfr_base_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertweetfr_base_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertweetfr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|412.9 MB| + +## References + +https://huggingface.co/Yanzhu/bertweetfr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_en.md b/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_en.md new file mode 100644 index 00000000000000..cc9eb17cd6843d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English best_model_yelp_polarity_32_87 AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_32_87 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_32_87` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_87_en_5.5.0_3.0_1725488146017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_87_en_5.5.0_3.0_1725488146017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_32_87","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_32_87", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_32_87| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-32-87 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_pipeline_en.md new file mode 100644 index 00000000000000..819d77507f5f5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-best_model_yelp_polarity_32_87_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English best_model_yelp_polarity_32_87_pipeline pipeline AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_32_87_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_32_87_pipeline` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_87_pipeline_en_5.5.0_3.0_1725488148374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_87_pipeline_en_5.5.0_3.0_1725488148374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("best_model_yelp_polarity_32_87_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("best_model_yelp_polarity_32_87_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_32_87_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-32-87 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-beto_finetuned_ner_3_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-04-beto_finetuned_ner_3_pipeline_es.md new file mode 100644 index 00000000000000..08fbc4a0831429 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-beto_finetuned_ner_3_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish beto_finetuned_ner_3_pipeline pipeline BertForTokenClassification from ifis +author: John Snow Labs +name: beto_finetuned_ner_3_pipeline +date: 2024-09-04 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_finetuned_ner_3_pipeline` is a Castilian, Spanish model originally trained by ifis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_3_pipeline_es_5.5.0_3.0_1725450384642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_finetuned_ner_3_pipeline_es_5.5.0_3.0_1725450384642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("beto_finetuned_ner_3_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("beto_finetuned_ner_3_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_finetuned_ner_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/ifis/BETO-finetuned-ner-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-binary_token_classification_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-binary_token_classification_model_pipeline_en.md new file mode 100644 index 00000000000000..8c20a301f9b90a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-binary_token_classification_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English binary_token_classification_model_pipeline pipeline DistilBertForTokenClassification from sjtukai +author: John Snow Labs +name: binary_token_classification_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`binary_token_classification_model_pipeline` is a English model originally trained by sjtukai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/binary_token_classification_model_pipeline_en_5.5.0_3.0_1725476452805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/binary_token_classification_model_pipeline_en_5.5.0_3.0_1725476452805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("binary_token_classification_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("binary_token_classification_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|binary_token_classification_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/sjtukai/binary_token_classification_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-blair_roberta_base_generative_sentiment_en.md b/docs/_posts/ahmedlone127/2024-09-04-blair_roberta_base_generative_sentiment_en.md new file mode 100644 index 00000000000000..6333810c9eff7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-blair_roberta_base_generative_sentiment_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English blair_roberta_base_generative_sentiment RoBertaForSequenceClassification from alapanik +author: John Snow Labs +name: blair_roberta_base_generative_sentiment +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`blair_roberta_base_generative_sentiment` is a English model originally trained by alapanik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/blair_roberta_base_generative_sentiment_en_5.5.0_3.0_1725452904425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/blair_roberta_base_generative_sentiment_en_5.5.0_3.0_1725452904425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("blair_roberta_base_generative_sentiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("blair_roberta_base_generative_sentiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|blair_roberta_base_generative_sentiment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.1 MB| + +## References + +https://huggingface.co/alapanik/blair-roberta-base-generative-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bob_oriya_not_bob_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-bob_oriya_not_bob_pipeline_en.md new file mode 100644 index 00000000000000..9e52e8aeb1355b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bob_oriya_not_bob_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bob_oriya_not_bob_pipeline pipeline DistilBertForSequenceClassification from MathNcl +author: John Snow Labs +name: bob_oriya_not_bob_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bob_oriya_not_bob_pipeline` is a English model originally trained by MathNcl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bob_oriya_not_bob_pipeline_en_5.5.0_3.0_1725490139567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bob_oriya_not_bob_pipeline_en_5.5.0_3.0_1725490139567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bob_oriya_not_bob_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bob_oriya_not_bob_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bob_oriya_not_bob_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MathNcl/Bob_or_not_Bob + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-book_recognizer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-book_recognizer_pipeline_en.md new file mode 100644 index 00000000000000..dcb4032ac3414d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-book_recognizer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English book_recognizer_pipeline pipeline DistilBertForSequenceClassification from LaLaf93 +author: John Snow Labs +name: book_recognizer_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`book_recognizer_pipeline` is a English model originally trained by LaLaf93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/book_recognizer_pipeline_en_5.5.0_3.0_1725489793893.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/book_recognizer_pipeline_en_5.5.0_3.0_1725489793893.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("book_recognizer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("book_recognizer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|book_recognizer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/LaLaf93/book_recognizer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-bsc_bio_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-04-bsc_bio_spanish_es.md new file mode 100644 index 00000000000000..c5a4bdbbe4e20f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-bsc_bio_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_spanish RoBertaEmbeddings from PlanTL-GOB-ES +author: John Snow Labs +name: bsc_bio_spanish +date: 2024-09-04 +tags: [es, open_source, onnx, embeddings, roberta] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_spanish` is a Castilian, Spanish model originally trained by PlanTL-GOB-ES. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_spanish_es_5.5.0_3.0_1725413189462.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_spanish_es_5.5.0_3.0_1725413189462.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bsc_bio_spanish","es") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bsc_bio_spanish","es") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|es| +|Size:|295.5 MB| + +## References + +https://huggingface.co/PlanTL-GOB-ES/bsc-bio-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_en.md new file mode 100644 index 00000000000000..ba071763ca0fa8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_health_qa_model_35 RoBertaForQuestionAnswering from yashwan2003 +author: John Snow Labs +name: burmese_awesome_health_qa_model_35 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_health_qa_model_35` is a English model originally trained by yashwan2003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_health_qa_model_35_en_5.5.0_3.0_1725484110275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_health_qa_model_35_en_5.5.0_3.0_1725484110275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("burmese_awesome_health_qa_model_35","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("burmese_awesome_health_qa_model_35", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_health_qa_model_35| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.7 MB| + +## References + +https://huggingface.co/yashwan2003/my_awesome_health_qa_model_35 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_pipeline_en.md new file mode 100644 index 00000000000000..b844aee354ae5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_health_qa_model_35_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_health_qa_model_35_pipeline pipeline RoBertaForQuestionAnswering from yashwan2003 +author: John Snow Labs +name: burmese_awesome_health_qa_model_35_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_health_qa_model_35_pipeline` is a English model originally trained by yashwan2003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_health_qa_model_35_pipeline_en_5.5.0_3.0_1725484140539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_health_qa_model_35_pipeline_en_5.5.0_3.0_1725484140539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_health_qa_model_35_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_health_qa_model_35_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_health_qa_model_35_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.7 MB| + +## References + +https://huggingface.co/yashwan2003/my_awesome_health_qa_model_35 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_model_2_nicolehao7_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_model_2_nicolehao7_en.md new file mode 100644 index 00000000000000..fa09ba93748bc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_model_2_nicolehao7_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_2_nicolehao7 DistilBertForSequenceClassification from nicolehao7 +author: John Snow Labs +name: burmese_awesome_model_2_nicolehao7 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_2_nicolehao7` is a English model originally trained by nicolehao7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_2_nicolehao7_en_5.5.0_3.0_1725490269590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_2_nicolehao7_en_5.5.0_3.0_1725490269590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_2_nicolehao7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_2_nicolehao7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_2_nicolehao7| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/nicolehao7/my_awesome_model_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_qa_model_40_len_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_qa_model_40_len_pipeline_en.md new file mode 100644 index 00000000000000..e131213fe45960 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_qa_model_40_len_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_40_len_pipeline pipeline RoBertaForQuestionAnswering from yashwan2003 +author: John Snow Labs +name: burmese_awesome_qa_model_40_len_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_40_len_pipeline` is a English model originally trained by yashwan2003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_40_len_pipeline_en_5.5.0_3.0_1725483836798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_40_len_pipeline_en_5.5.0_3.0_1725483836798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_40_len_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_40_len_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_40_len_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/yashwan2003/my_awesome_qa_model_40_len + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_actor_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_actor_pipeline_en.md new file mode 100644 index 00000000000000..6b5d9c53a6f47f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_actor_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_actor_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_actor_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_actor_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_actor_pipeline_en_5.5.0_3.0_1725461099343.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_actor_pipeline_en_5.5.0_3.0_1725461099343.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_actor_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_actor_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_actor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_Actor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_all_time_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_all_time_pipeline_en.md new file mode 100644 index 00000000000000..006288c5274a63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_all_time_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_all_time_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_all_time_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_all_time_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_time_pipeline_en_5.5.0_3.0_1725448793136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_time_pipeline_en_5.5.0_3.0_1725448793136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_all_time_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_all_time_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_all_time_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_all_Time + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_adisur_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_adisur_en.md new file mode 100644 index 00000000000000..ed547c3ae0d713 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_adisur_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_adisur DistilBertForTokenClassification from adisur +author: John Snow Labs +name: burmese_awesome_wnut_model_adisur +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_adisur` is a English model originally trained by adisur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_adisur_en_5.5.0_3.0_1725460664922.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_adisur_en_5.5.0_3.0_1725460664922.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_adisur","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_adisur", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_adisur| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/adisur/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_en.md new file mode 100644 index 00000000000000..4342c692982d63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_aditya_jindal DistilBertForTokenClassification from adityajindal +author: John Snow Labs +name: burmese_awesome_wnut_model_aditya_jindal +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_aditya_jindal` is a English model originally trained by adityajindal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_aditya_jindal_en_5.5.0_3.0_1725492885685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_aditya_jindal_en_5.5.0_3.0_1725492885685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_aditya_jindal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_aditya_jindal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_aditya_jindal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/adityajindal/my_awesome_wnut_model_aditya_jindal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_pipeline_en.md new file mode 100644 index 00000000000000..7e1feb67556a62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_aditya_jindal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_aditya_jindal_pipeline pipeline DistilBertForTokenClassification from adityajindal +author: John Snow Labs +name: burmese_awesome_wnut_model_aditya_jindal_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_aditya_jindal_pipeline` is a English model originally trained by adityajindal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_aditya_jindal_pipeline_en_5.5.0_3.0_1725492898143.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_aditya_jindal_pipeline_en_5.5.0_3.0_1725492898143.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_aditya_jindal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_aditya_jindal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_aditya_jindal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/adityajindal/my_awesome_wnut_model_aditya_jindal + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_almifosa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_almifosa_pipeline_en.md new file mode 100644 index 00000000000000..9ecad1e460558d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_almifosa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_almifosa_pipeline pipeline DistilBertForTokenClassification from almifosa +author: John Snow Labs +name: burmese_awesome_wnut_model_almifosa_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_almifosa_pipeline` is a English model originally trained by almifosa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_almifosa_pipeline_en_5.5.0_3.0_1725448215152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_almifosa_pipeline_en_5.5.0_3.0_1725448215152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_almifosa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_almifosa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_almifosa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/almifosa/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_asrajgct_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_asrajgct_en.md new file mode 100644 index 00000000000000..87ee0b442a8970 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_asrajgct_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_asrajgct DistilBertForTokenClassification from asrajgct +author: John Snow Labs +name: burmese_awesome_wnut_model_asrajgct +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_asrajgct` is a English model originally trained by asrajgct. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_asrajgct_en_5.5.0_3.0_1725475758875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_asrajgct_en_5.5.0_3.0_1725475758875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_asrajgct","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_asrajgct", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_asrajgct| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/asrajgct/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_charliefederer_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_charliefederer_en.md new file mode 100644 index 00000000000000..b4125e8088af19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_charliefederer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_charliefederer DistilBertForTokenClassification from Charliefederer +author: John Snow Labs +name: burmese_awesome_wnut_model_charliefederer +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_charliefederer` is a English model originally trained by Charliefederer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_charliefederer_en_5.5.0_3.0_1725492437589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_charliefederer_en_5.5.0_3.0_1725492437589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_charliefederer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_charliefederer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_charliefederer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Charliefederer/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_claire5776_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_claire5776_pipeline_en.md new file mode 100644 index 00000000000000..963f5d57b3f22a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_claire5776_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_claire5776_pipeline pipeline DistilBertForTokenClassification from claire5776 +author: John Snow Labs +name: burmese_awesome_wnut_model_claire5776_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_claire5776_pipeline` is a English model originally trained by claire5776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_claire5776_pipeline_en_5.5.0_3.0_1725448602312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_claire5776_pipeline_en_5.5.0_3.0_1725448602312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_claire5776_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_claire5776_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_claire5776_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/claire5776/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_diodiodada_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_diodiodada_en.md new file mode 100644 index 00000000000000..3421d58d039cfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_diodiodada_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_diodiodada DistilBertForTokenClassification from diodiodada +author: John Snow Labs +name: burmese_awesome_wnut_model_diodiodada +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_diodiodada` is a English model originally trained by diodiodada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_diodiodada_en_5.5.0_3.0_1725461101615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_diodiodada_en_5.5.0_3.0_1725461101615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_diodiodada","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_diodiodada", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_diodiodada| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/diodiodada/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_gaogao8_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_gaogao8_en.md new file mode 100644 index 00000000000000..66911087120c93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_gaogao8_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_gaogao8 DistilBertForTokenClassification from gaogao8 +author: John Snow Labs +name: burmese_awesome_wnut_model_gaogao8 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_gaogao8` is a English model originally trained by gaogao8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_gaogao8_en_5.5.0_3.0_1725475759673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_gaogao8_en_5.5.0_3.0_1725475759673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_gaogao8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_gaogao8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_gaogao8| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gaogao8/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hcy5561_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hcy5561_pipeline_en.md new file mode 100644 index 00000000000000..96845481247d77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hcy5561_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hcy5561_pipeline pipeline DistilBertForTokenClassification from hcy5561 +author: John Snow Labs +name: burmese_awesome_wnut_model_hcy5561_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hcy5561_pipeline` is a English model originally trained by hcy5561. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hcy5561_pipeline_en_5.5.0_3.0_1725448923572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hcy5561_pipeline_en_5.5.0_3.0_1725448923572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_hcy5561_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_hcy5561_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hcy5561_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hcy5561/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hrodriguez_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hrodriguez_pipeline_en.md new file mode 100644 index 00000000000000..22351bd02be203 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_hrodriguez_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hrodriguez_pipeline pipeline DistilBertForTokenClassification from hrodriguez +author: John Snow Labs +name: burmese_awesome_wnut_model_hrodriguez_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hrodriguez_pipeline` is a English model originally trained by hrodriguez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hrodriguez_pipeline_en_5.5.0_3.0_1725476119013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hrodriguez_pipeline_en_5.5.0_3.0_1725476119013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_hrodriguez_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_hrodriguez_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hrodriguez_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hrodriguez/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_lmattes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_lmattes_pipeline_en.md new file mode 100644 index 00000000000000..3194563dcd2f1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_lmattes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_lmattes_pipeline pipeline DistilBertForTokenClassification from lmattes +author: John Snow Labs +name: burmese_awesome_wnut_model_lmattes_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_lmattes_pipeline` is a English model originally trained by lmattes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_lmattes_pipeline_en_5.5.0_3.0_1725492991222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_lmattes_pipeline_en_5.5.0_3.0_1725492991222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_lmattes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_lmattes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_lmattes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/lmattes/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_malduwais_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_malduwais_en.md new file mode 100644 index 00000000000000..df2d10700ed3ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_malduwais_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_malduwais DistilBertForTokenClassification from malduwais +author: John Snow Labs +name: burmese_awesome_wnut_model_malduwais +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_malduwais` is a English model originally trained by malduwais. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_malduwais_en_5.5.0_3.0_1725448317822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_malduwais_en_5.5.0_3.0_1725448317822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_malduwais","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_malduwais", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_malduwais| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/malduwais/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_minhminh09_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_minhminh09_pipeline_en.md new file mode 100644 index 00000000000000..463e9284200927 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_minhminh09_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_minhminh09_pipeline pipeline DistilBertForTokenClassification from MinhMinh09 +author: John Snow Labs +name: burmese_awesome_wnut_model_minhminh09_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_minhminh09_pipeline` is a English model originally trained by MinhMinh09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_minhminh09_pipeline_en_5.5.0_3.0_1725448530243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_minhminh09_pipeline_en_5.5.0_3.0_1725448530243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_minhminh09_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_minhminh09_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_minhminh09_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MinhMinh09/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en.md new file mode 100644 index 00000000000000..48ad8c2f3c4426 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_moumitanettojanamanna_pipeline pipeline DistilBertForTokenClassification from MoumitaNettoJanaManna +author: John Snow Labs +name: burmese_awesome_wnut_model_moumitanettojanamanna_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_moumitanettojanamanna_pipeline` is a English model originally trained by MoumitaNettoJanaManna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en_5.5.0_3.0_1725476011264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_moumitanettojanamanna_pipeline_en_5.5.0_3.0_1725476011264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_moumitanettojanamanna_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_moumitanettojanamanna_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_moumitanettojanamanna_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MoumitaNettoJanaManna/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_en.md new file mode 100644 index 00000000000000..fe6df5d2eaaad7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_portokali DistilBertForTokenClassification from Portokali +author: John Snow Labs +name: burmese_awesome_wnut_model_portokali +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_portokali` is a English model originally trained by Portokali. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_portokali_en_5.5.0_3.0_1725493024924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_portokali_en_5.5.0_3.0_1725493024924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_portokali","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_portokali", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_portokali| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Portokali/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_pipeline_en.md new file mode 100644 index 00000000000000..c6219113652ff6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_portokali_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_portokali_pipeline pipeline DistilBertForTokenClassification from Portokali +author: John Snow Labs +name: burmese_awesome_wnut_model_portokali_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_portokali_pipeline` is a English model originally trained by Portokali. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_portokali_pipeline_en_5.5.0_3.0_1725493036605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_portokali_pipeline_en_5.5.0_3.0_1725493036605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_portokali_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_portokali_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_portokali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Portokali/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_robinsh2023_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_robinsh2023_pipeline_en.md new file mode 100644 index 00000000000000..7376bed87332ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_robinsh2023_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_robinsh2023_pipeline pipeline DistilBertForTokenClassification from Robinsh2023 +author: John Snow Labs +name: burmese_awesome_wnut_model_robinsh2023_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_robinsh2023_pipeline` is a English model originally trained by Robinsh2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_robinsh2023_pipeline_en_5.5.0_3.0_1725449044140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_robinsh2023_pipeline_en_5.5.0_3.0_1725449044140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_robinsh2023_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_robinsh2023_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_robinsh2023_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Robinsh2023/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_rw2614_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_rw2614_pipeline_en.md new file mode 100644 index 00000000000000..9d3450936127b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_rw2614_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_rw2614_pipeline pipeline DistilBertForTokenClassification from rw2614 +author: John Snow Labs +name: burmese_awesome_wnut_model_rw2614_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_rw2614_pipeline` is a English model originally trained by rw2614. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_rw2614_pipeline_en_5.5.0_3.0_1725461016349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_rw2614_pipeline_en_5.5.0_3.0_1725461016349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_rw2614_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_rw2614_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_rw2614_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rw2614/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_en.md new file mode 100644 index 00000000000000..58b4687b06c3c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_svangorden13 DistilBertForTokenClassification from Svangorden13 +author: John Snow Labs +name: burmese_awesome_wnut_model_svangorden13 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_svangorden13` is a English model originally trained by Svangorden13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_svangorden13_en_5.5.0_3.0_1725475891195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_svangorden13_en_5.5.0_3.0_1725475891195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_svangorden13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_svangorden13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_svangorden13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Svangorden13/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_pipeline_en.md new file mode 100644 index 00000000000000..d1e6d20ba134ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_svangorden13_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_svangorden13_pipeline pipeline DistilBertForTokenClassification from Svangorden13 +author: John Snow Labs +name: burmese_awesome_wnut_model_svangorden13_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_svangorden13_pipeline` is a English model originally trained by Svangorden13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_svangorden13_pipeline_en_5.5.0_3.0_1725475904593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_svangorden13_pipeline_en_5.5.0_3.0_1725475904593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_svangorden13_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_svangorden13_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_svangorden13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Svangorden13/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_urisoo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_urisoo_pipeline_en.md new file mode 100644 index 00000000000000..2039382cdf52b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_urisoo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_urisoo_pipeline pipeline DistilBertForTokenClassification from urisoo +author: John Snow Labs +name: burmese_awesome_wnut_model_urisoo_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_urisoo_pipeline` is a English model originally trained by urisoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_urisoo_pipeline_en_5.5.0_3.0_1725476123310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_urisoo_pipeline_en_5.5.0_3.0_1725476123310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_urisoo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_urisoo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_urisoo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/urisoo/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_wzchen_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_wzchen_en.md new file mode 100644 index 00000000000000..b0df349f336e7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_model_wzchen_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_wzchen DistilBertForTokenClassification from wzChen +author: John Snow Labs +name: burmese_awesome_wnut_model_wzchen +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_wzchen` is a English model originally trained by wzChen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_wzchen_en_5.5.0_3.0_1725460953410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_wzchen_en_5.5.0_3.0_1725460953410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_wzchen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_wzchen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_wzchen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/wzChen/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_saprotection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_saprotection_pipeline_en.md new file mode 100644 index 00000000000000..83be16be6efa78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_saprotection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_saprotection_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_saprotection_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_saprotection_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_saprotection_pipeline_en_5.5.0_3.0_1725460909509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_saprotection_pipeline_en_5.5.0_3.0_1725460909509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_saprotection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_saprotection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_saprotection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_SAprotection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_target_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_target_pipeline_en.md new file mode 100644 index 00000000000000..8fccc719863acc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_awesome_wnut_target_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_target_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_target_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_target_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_target_pipeline_en_5.5.0_3.0_1725493133685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_target_pipeline_en_5.5.0_3.0_1725493133685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_target_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_target_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_target_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_Target + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_bert_qa_model_05_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_bert_qa_model_05_pipeline_en.md new file mode 100644 index 00000000000000..654895d3fec04e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_bert_qa_model_05_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_bert_qa_model_05_pipeline pipeline RoBertaForQuestionAnswering from arunkarthik +author: John Snow Labs +name: burmese_bert_qa_model_05_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_qa_model_05_pipeline` is a English model originally trained by arunkarthik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_qa_model_05_pipeline_en_5.5.0_3.0_1725479278198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_qa_model_05_pipeline_en_5.5.0_3.0_1725479278198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_bert_qa_model_05_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_bert_qa_model_05_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_qa_model_05_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/arunkarthik/my_bert_qa_model_05 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_en.md new file mode 100644 index 00000000000000..c666faa51cc955 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ner_model_atajan99 DistilBertForTokenClassification from Atajan99 +author: John Snow Labs +name: burmese_ner_model_atajan99 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_atajan99` is a English model originally trained by Atajan99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_atajan99_en_5.5.0_3.0_1725476386731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_atajan99_en_5.5.0_3.0_1725476386731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_atajan99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_atajan99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_atajan99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Atajan99/my_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_pipeline_en.md new file mode 100644 index 00000000000000..49e6a2e7406e78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_atajan99_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_atajan99_pipeline pipeline DistilBertForTokenClassification from Atajan99 +author: John Snow Labs +name: burmese_ner_model_atajan99_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_atajan99_pipeline` is a English model originally trained by Atajan99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_atajan99_pipeline_en_5.5.0_3.0_1725476399869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_atajan99_pipeline_en_5.5.0_3.0_1725476399869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_atajan99_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_atajan99_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_atajan99_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Atajan99/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_en.md new file mode 100644 index 00000000000000..78817e3833815b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ner_model_delphine18 DistilBertForTokenClassification from delphine18 +author: John Snow Labs +name: burmese_ner_model_delphine18 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_delphine18` is a English model originally trained by delphine18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_delphine18_en_5.5.0_3.0_1725460451827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_delphine18_en_5.5.0_3.0_1725460451827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_delphine18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_delphine18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_delphine18| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/delphine18/my_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_pipeline_en.md new file mode 100644 index 00000000000000..57f99b4321fa64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_delphine18_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_delphine18_pipeline pipeline DistilBertForTokenClassification from delphine18 +author: John Snow Labs +name: burmese_ner_model_delphine18_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_delphine18_pipeline` is a English model originally trained by delphine18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_delphine18_pipeline_en_5.5.0_3.0_1725460463752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_delphine18_pipeline_en_5.5.0_3.0_1725460463752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_delphine18_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_delphine18_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_delphine18_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/delphine18/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_luccaaug_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_luccaaug_pipeline_en.md new file mode 100644 index 00000000000000..bfd75c9454b7f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-burmese_ner_model_luccaaug_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_luccaaug_pipeline pipeline DistilBertForTokenClassification from LuccaAug +author: John Snow Labs +name: burmese_ner_model_luccaaug_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_luccaaug_pipeline` is a English model originally trained by LuccaAug. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_luccaaug_pipeline_en_5.5.0_3.0_1725492929477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_luccaaug_pipeline_en_5.5.0_3.0_1725492929477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_luccaaug_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_luccaaug_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_luccaaug_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/LuccaAug/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-camembert_base_dataikunlp_fr.md b/docs/_posts/ahmedlone127/2024-09-04-camembert_base_dataikunlp_fr.md new file mode 100644 index 00000000000000..8241c6fac37685 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-camembert_base_dataikunlp_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French camembert_base_dataikunlp CamemBertEmbeddings from DataikuNLP +author: John Snow Labs +name: camembert_base_dataikunlp +date: 2024-09-04 +tags: [fr, open_source, onnx, embeddings, camembert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_base_dataikunlp` is a French model originally trained by DataikuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_dataikunlp_fr_5.5.0_3.0_1725408404157.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_dataikunlp_fr_5.5.0_3.0_1725408404157.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_base_dataikunlp","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_dataikunlp","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_dataikunlp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|fr| +|Size:|264.0 MB| + +## References + +https://huggingface.co/DataikuNLP/camembert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en.md new file mode 100644 index 00000000000000..49f61509509865 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline pipeline CamemBertForSequenceClassification from AntoineD +author: John Snow Labs +name: camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline` is a English model originally trained by AntoineD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en_5.5.0_3.0_1725466981504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline_en_5.5.0_3.0_1725466981504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_ccnet_classification_analyse_visage_classifier_only_french_lr1e_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|265.9 MB| + +## References + +https://huggingface.co/AntoineD/camembert_ccnet_classification_analyse_visage_classifier-only_fr_lr1e-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-camembert_mlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-camembert_mlm_pipeline_en.md new file mode 100644 index 00000000000000..dfd89cf7d702d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-camembert_mlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English camembert_mlm_pipeline pipeline CamemBertEmbeddings from Jodsa +author: John Snow Labs +name: camembert_mlm_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_mlm_pipeline` is a English model originally trained by Jodsa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_mlm_pipeline_en_5.5.0_3.0_1725444993662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_mlm_pipeline_en_5.5.0_3.0_1725444993662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("camembert_mlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("camembert_mlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_mlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|417.9 MB| + +## References + +https://huggingface.co/Jodsa/camembert_mlm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_en.md b/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_en.md new file mode 100644 index 00000000000000..8e7260a64302c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classify_isin_step7_binary AlbertForSequenceClassification from calculito +author: John Snow Labs +name: classify_isin_step7_binary +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_isin_step7_binary` is a English model originally trained by calculito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_isin_step7_binary_en_5.5.0_3.0_1725464461696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_isin_step7_binary_en_5.5.0_3.0_1725464461696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_isin_step7_binary","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_isin_step7_binary", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_isin_step7_binary| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/calculito/classify-ISIN-STEP7_binary \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_pipeline_en.md new file mode 100644 index 00000000000000..fe4dee3e347364 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-classify_isin_step7_binary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English classify_isin_step7_binary_pipeline pipeline AlbertForSequenceClassification from calculito +author: John Snow Labs +name: classify_isin_step7_binary_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_isin_step7_binary_pipeline` is a English model originally trained by calculito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_isin_step7_binary_pipeline_en_5.5.0_3.0_1725464464119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_isin_step7_binary_pipeline_en_5.5.0_3.0_1725464464119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("classify_isin_step7_binary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("classify_isin_step7_binary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_isin_step7_binary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/calculito/classify-ISIN-STEP7_binary + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clinicalbert_bionlp13cg_ner_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-09-04-clinicalbert_bionlp13cg_ner_nepal_bhasa_en.md new file mode 100644 index 00000000000000..0491e578cbb169 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clinicalbert_bionlp13cg_ner_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English clinicalbert_bionlp13cg_ner_nepal_bhasa DistilBertForTokenClassification from judithrosell +author: John Snow Labs +name: clinicalbert_bionlp13cg_ner_nepal_bhasa +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalbert_bionlp13cg_ner_nepal_bhasa` is a English model originally trained by judithrosell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalbert_bionlp13cg_ner_nepal_bhasa_en_5.5.0_3.0_1725476406337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalbert_bionlp13cg_ner_nepal_bhasa_en_5.5.0_3.0_1725476406337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("clinicalbert_bionlp13cg_ner_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("clinicalbert_bionlp13cg_ner_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalbert_bionlp13cg_ner_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/judithrosell/ClinicalBERT_BioNLP13CG_NER_new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_en.md new file mode 100644 index 00000000000000..c77c8c43d31fc2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_base_patch16_supervised_mulitilingual_1600 CLIPForZeroShotClassification from gowitheflowlab +author: John Snow Labs +name: clip_base_patch16_supervised_mulitilingual_1600 +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_base_patch16_supervised_mulitilingual_1600` is a English model originally trained by gowitheflowlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_base_patch16_supervised_mulitilingual_1600_en_5.5.0_3.0_1725490955649.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_base_patch16_supervised_mulitilingual_1600_en_5.5.0_3.0_1725490955649.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_base_patch16_supervised_mulitilingual_1600","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_base_patch16_supervised_mulitilingual_1600","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_base_patch16_supervised_mulitilingual_1600| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|509.7 MB| + +## References + +https://huggingface.co/gowitheflowlab/clip-base-patch16-supervised-mulitilingual-1600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_pipeline_en.md new file mode 100644 index 00000000000000..52757d3c7c059d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_base_patch16_supervised_mulitilingual_1600_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_base_patch16_supervised_mulitilingual_1600_pipeline pipeline CLIPForZeroShotClassification from gowitheflowlab +author: John Snow Labs +name: clip_base_patch16_supervised_mulitilingual_1600_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_base_patch16_supervised_mulitilingual_1600_pipeline` is a English model originally trained by gowitheflowlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_base_patch16_supervised_mulitilingual_1600_pipeline_en_5.5.0_3.0_1725491002252.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_base_patch16_supervised_mulitilingual_1600_pipeline_en_5.5.0_3.0_1725491002252.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_base_patch16_supervised_mulitilingual_1600_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_base_patch16_supervised_mulitilingual_1600_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_base_patch16_supervised_mulitilingual_1600_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.7 MB| + +## References + +https://huggingface.co/gowitheflowlab/clip-base-patch16-supervised-mulitilingual-1600 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_crop_disease_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_crop_disease_pipeline_en.md new file mode 100644 index 00000000000000..d98f7588f90fc5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_crop_disease_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_crop_disease_pipeline pipeline CLIPForZeroShotClassification from TonyStarkD99 +author: John Snow Labs +name: clip_crop_disease_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_crop_disease_pipeline` is a English model originally trained by TonyStarkD99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_crop_disease_pipeline_en_5.5.0_3.0_1725492253886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_crop_disease_pipeline_en_5.5.0_3.0_1725492253886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_crop_disease_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_crop_disease_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_crop_disease_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|400.5 MB| + +## References + +https://huggingface.co/TonyStarkD99/CLIP-Crop_Disease + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_demo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_demo_pipeline_en.md new file mode 100644 index 00000000000000..f3500d914d1725 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_demo_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_demo_pipeline pipeline CLIPForZeroShotClassification from zabir735 +author: John Snow Labs +name: clip_demo_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_demo_pipeline` is a English model originally trained by zabir735. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_demo_pipeline_en_5.5.0_3.0_1725455250524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_demo_pipeline_en_5.5.0_3.0_1725455250524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_demo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_demo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_demo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|561.2 MB| + +## References + +https://huggingface.co/zabir735/clip-demo + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_en.md new file mode 100644 index 00000000000000..d96d0b28cc4faf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_base_patch16 CLIPForZeroShotClassification from openai +author: John Snow Labs +name: clip_vit_base_patch16 +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch16` is a English model originally trained by openai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_en_5.5.0_3.0_1725490970377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_en_5.5.0_3.0_1725490970377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|393.8 MB| + +## References + +https://huggingface.co/openai/clip-vit-base-patch16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_pipeline_en.md new file mode 100644 index 00000000000000..1f0227960c5eb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch16_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_base_patch16_pipeline pipeline CLIPForZeroShotClassification from openai +author: John Snow Labs +name: clip_vit_base_patch16_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch16_pipeline` is a English model originally trained by openai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_pipeline_en_5.5.0_3.0_1725491061311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_pipeline_en_5.5.0_3.0_1725491061311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_base_patch16_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_base_patch16_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch16_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|393.8 MB| + +## References + +https://huggingface.co/openai/clip-vit-base-patch16 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch322_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch322_en.md new file mode 100644 index 00000000000000..798cae585d48a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch322_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_base_patch322 CLIPForZeroShotClassification from sergioprada +author: John Snow Labs +name: clip_vit_base_patch322 +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch322` is a English model originally trained by sergioprada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch322_en_5.5.0_3.0_1725456418659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch322_en_5.5.0_3.0_1725456418659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch322","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch322","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch322| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|397.5 MB| + +## References + +https://huggingface.co/sergioprada/clip-vit-base-patch322 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch32_demo_rvignav_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch32_demo_rvignav_pipeline_en.md new file mode 100644 index 00000000000000..6a75b2407909b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_base_patch32_demo_rvignav_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_base_patch32_demo_rvignav_pipeline pipeline CLIPForZeroShotClassification from rvignav +author: John Snow Labs +name: clip_vit_base_patch32_demo_rvignav_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch32_demo_rvignav_pipeline` is a English model originally trained by rvignav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch32_demo_rvignav_pipeline_en_5.5.0_3.0_1725491669942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch32_demo_rvignav_pipeline_en_5.5.0_3.0_1725491669942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_base_patch32_demo_rvignav_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_base_patch32_demo_rvignav_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch32_demo_rvignav_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|397.5 MB| + +## References + +https://huggingface.co/rvignav/clip-vit-base-patch32-demo + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_l_14_laion2b_s32b_b82k_laion_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_l_14_laion2b_s32b_b82k_laion_en.md new file mode 100644 index 00000000000000..a81d9e71b00549 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_l_14_laion2b_s32b_b82k_laion_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_l_14_laion2b_s32b_b82k_laion CLIPForZeroShotClassification from laion +author: John Snow Labs +name: clip_vit_l_14_laion2b_s32b_b82k_laion +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_l_14_laion2b_s32b_b82k_laion` is a English model originally trained by laion. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_l_14_laion2b_s32b_b82k_laion_en_5.5.0_3.0_1725456197731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_l_14_laion2b_s32b_b82k_laion_en_5.5.0_3.0_1725456197731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_l_14_laion2b_s32b_b82k_laion","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_l_14_laion2b_s32b_b82k_laion","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_l_14_laion2b_s32b_b82k_laion| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_336_q_mm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_336_q_mm_pipeline_en.md new file mode 100644 index 00000000000000..ece17988ca336d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_336_q_mm_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_336_q_mm_pipeline pipeline CLIPForZeroShotClassification from Q-MM +author: John Snow Labs +name: clip_vit_large_patch14_336_q_mm_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_336_q_mm_pipeline` is a English model originally trained by Q-MM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_336_q_mm_pipeline_en_5.5.0_3.0_1725491647136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_336_q_mm_pipeline_en_5.5.0_3.0_1725491647136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_336_q_mm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_336_q_mm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_336_q_mm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/Q-MM/clip-vit-large-patch14-336 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en.md new file mode 100644 index 00000000000000..0301fc9e22bdcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_finetuned_dresser_sofas_pipeline pipeline CLIPForZeroShotClassification from vinluvie +author: John Snow Labs +name: clip_vit_large_patch14_finetuned_dresser_sofas_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_finetuned_dresser_sofas_pipeline` is a English model originally trained by vinluvie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en_5.5.0_3.0_1725491694309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_dresser_sofas_pipeline_en_5.5.0_3.0_1725491694309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_finetuned_dresser_sofas_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_finetuned_dresser_sofas_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_finetuned_dresser_sofas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/vinluvie/clip-vit-large-patch14-finetuned-dresser-sofas + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_en.md new file mode 100644 index 00000000000000..ee2d1f17cd2162 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_superlore CLIPForZeroShotClassification from Superlore +author: John Snow Labs +name: clip_vit_large_patch14_superlore +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_superlore` is a English model originally trained by Superlore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_superlore_en_5.5.0_3.0_1725491569044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_superlore_en_5.5.0_3.0_1725491569044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_superlore","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_superlore","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_superlore| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/Superlore/clip-vit-large-patch14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_pipeline_en.md new file mode 100644 index 00000000000000..167ffb5ed7a640 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_superlore_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_superlore_pipeline pipeline CLIPForZeroShotClassification from Superlore +author: John Snow Labs +name: clip_vit_large_patch14_superlore_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_superlore_pipeline` is a English model originally trained by Superlore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_superlore_pipeline_en_5.5.0_3.0_1725491845306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_superlore_pipeline_en_5.5.0_3.0_1725491845306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_superlore_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_superlore_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_superlore_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/Superlore/clip-vit-large-patch14 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_en.md new file mode 100644 index 00000000000000..d2b3bcb572a715 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_trainformeta CLIPForZeroShotClassification from zueskalare +author: John Snow Labs +name: clip_vit_large_patch14_trainformeta +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_trainformeta` is a English model originally trained by zueskalare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_trainformeta_en_5.5.0_3.0_1725455511301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_trainformeta_en_5.5.0_3.0_1725455511301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_trainformeta","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_trainformeta","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_trainformeta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/zueskalare/clip-vit-large-patch14-TrainForMeta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_pipeline_en.md new file mode 100644 index 00000000000000..eaadd068efeb65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-clip_vit_large_patch14_trainformeta_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_trainformeta_pipeline pipeline CLIPForZeroShotClassification from zueskalare +author: John Snow Labs +name: clip_vit_large_patch14_trainformeta_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_trainformeta_pipeline` is a English model originally trained by zueskalare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_trainformeta_pipeline_en_5.5.0_3.0_1725455590092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_trainformeta_pipeline_en_5.5.0_3.0_1725455590092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_trainformeta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_trainformeta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_trainformeta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/zueskalare/clip-vit-large-patch14-TrainForMeta + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-codebert_python_en.md b/docs/_posts/ahmedlone127/2024-09-04-codebert_python_en.md new file mode 100644 index 00000000000000..b00206cc9f3719 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-codebert_python_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English codebert_python RoBertaEmbeddings from neulab +author: John Snow Labs +name: codebert_python +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codebert_python` is a English model originally trained by neulab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codebert_python_en_5.5.0_3.0_1725412748492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codebert_python_en_5.5.0_3.0_1725412748492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("codebert_python","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("codebert_python","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codebert_python| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/neulab/codebert-python \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_en.md b/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_en.md new file mode 100644 index 00000000000000..853afb4e978632 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cpegen_pv DistilBertForTokenClassification from Neurona +author: John Snow Labs +name: cpegen_pv +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpegen_pv` is a English model originally trained by Neurona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpegen_pv_en_5.5.0_3.0_1725492437561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpegen_pv_en_5.5.0_3.0_1725492437561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("cpegen_pv","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("cpegen_pv", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpegen_pv| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Neurona/cpegen_pv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_pipeline_en.md new file mode 100644 index 00000000000000..6b4c4859031163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cpegen_pv_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpegen_pv_pipeline pipeline DistilBertForTokenClassification from Neurona +author: John Snow Labs +name: cpegen_pv_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpegen_pv_pipeline` is a English model originally trained by Neurona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpegen_pv_pipeline_en_5.5.0_3.0_1725492457381.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpegen_pv_pipeline_en_5.5.0_3.0_1725492457381.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpegen_pv_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpegen_pv_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpegen_pv_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Neurona/cpegen_pv + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cpegen_vv_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-cpegen_vv_pipeline_en.md new file mode 100644 index 00000000000000..c119fe2dcc695d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cpegen_vv_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpegen_vv_pipeline pipeline DistilBertForTokenClassification from Neurona +author: John Snow Labs +name: cpegen_vv_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpegen_vv_pipeline` is a English model originally trained by Neurona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpegen_vv_pipeline_en_5.5.0_3.0_1725449061388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpegen_vv_pipeline_en_5.5.0_3.0_1725449061388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpegen_vv_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpegen_vv_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpegen_vv_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Neurona/cpegen_vv + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-craft_clinicalbert_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-craft_clinicalbert_ner_en.md new file mode 100644 index 00000000000000..2fa57a380c95a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-craft_clinicalbert_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English craft_clinicalbert_ner DistilBertForTokenClassification from judithrosell +author: John Snow Labs +name: craft_clinicalbert_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`craft_clinicalbert_ner` is a English model originally trained by judithrosell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/craft_clinicalbert_ner_en_5.5.0_3.0_1725476163467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/craft_clinicalbert_ner_en_5.5.0_3.0_1725476163467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("craft_clinicalbert_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("craft_clinicalbert_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|craft_clinicalbert_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/judithrosell/CRAFT_ClinicalBERT_NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cree_fewshot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-cree_fewshot_pipeline_en.md new file mode 100644 index 00000000000000..49553da529a708 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cree_fewshot_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cree_fewshot_pipeline pipeline MPNetEmbeddings from pig4431 +author: John Snow Labs +name: cree_fewshot_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cree_fewshot_pipeline` is a English model originally trained by pig4431. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cree_fewshot_pipeline_en_5.5.0_3.0_1725470813483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cree_fewshot_pipeline_en_5.5.0_3.0_1725470813483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cree_fewshot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cree_fewshot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cree_fewshot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/pig4431/CR_fewshot + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cross_encoder_stsb_deberta_v3_large_en.md b/docs/_posts/ahmedlone127/2024-09-04-cross_encoder_stsb_deberta_v3_large_en.md new file mode 100644 index 00000000000000..7475aff440a3b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cross_encoder_stsb_deberta_v3_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cross_encoder_stsb_deberta_v3_large DeBertaForSequenceClassification from yunyu +author: John Snow Labs +name: cross_encoder_stsb_deberta_v3_large +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_encoder_stsb_deberta_v3_large` is a English model originally trained by yunyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_encoder_stsb_deberta_v3_large_en_5.5.0_3.0_1725440297580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_encoder_stsb_deberta_v3_large_en_5.5.0_3.0_1725440297580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("cross_encoder_stsb_deberta_v3_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("cross_encoder_stsb_deberta_v3_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_encoder_stsb_deberta_v3_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yunyu/cross-encoder-stsb-deberta-v3-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en.md new file mode 100644 index 00000000000000..330a3f0fea1038 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cs4248_roberta_wolof_search_mix_epoch_3_pipeline pipeline RoBertaForQuestionAnswering from BenjaminLHR +author: John Snow Labs +name: cs4248_roberta_wolof_search_mix_epoch_3_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cs4248_roberta_wolof_search_mix_epoch_3_pipeline` is a English model originally trained by BenjaminLHR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en_5.5.0_3.0_1725484266762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cs4248_roberta_wolof_search_mix_epoch_3_pipeline_en_5.5.0_3.0_1725484266762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cs4248_roberta_wolof_search_mix_epoch_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cs4248_roberta_wolof_search_mix_epoch_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cs4248_roberta_wolof_search_mix_epoch_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/BenjaminLHR/cs4248-roberta-wo-search-mix-epoch-3 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dagpap24_deberta_base_ft_en.md b/docs/_posts/ahmedlone127/2024-09-04-dagpap24_deberta_base_ft_en.md new file mode 100644 index 00000000000000..6187aa3fd1b7a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dagpap24_deberta_base_ft_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dagpap24_deberta_base_ft DeBertaForTokenClassification from swimmingcrab +author: John Snow Labs +name: dagpap24_deberta_base_ft +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dagpap24_deberta_base_ft` is a English model originally trained by swimmingcrab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dagpap24_deberta_base_ft_en_5.5.0_3.0_1725471684297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dagpap24_deberta_base_ft_en_5.5.0_3.0_1725471684297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("dagpap24_deberta_base_ft","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("dagpap24_deberta_base_ft", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dagpap24_deberta_base_ft| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|660.5 MB| + +## References + +https://huggingface.co/swimmingcrab/DAGPap24-deberta-base-ft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-datasnipper_finerdistilbert_fullsequence_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-datasnipper_finerdistilbert_fullsequence_pipeline_en.md new file mode 100644 index 00000000000000..f8925f33b2a6df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-datasnipper_finerdistilbert_fullsequence_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English datasnipper_finerdistilbert_fullsequence_pipeline pipeline DistilBertForTokenClassification from gvisser +author: John Snow Labs +name: datasnipper_finerdistilbert_fullsequence_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`datasnipper_finerdistilbert_fullsequence_pipeline` is a English model originally trained by gvisser. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/datasnipper_finerdistilbert_fullsequence_pipeline_en_5.5.0_3.0_1725476292320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/datasnipper_finerdistilbert_fullsequence_pipeline_en_5.5.0_3.0_1725476292320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("datasnipper_finerdistilbert_fullsequence_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("datasnipper_finerdistilbert_fullsequence_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|datasnipper_finerdistilbert_fullsequence_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|248.1 MB| + +## References + +https://huggingface.co/gvisser/DataSnipper_FinerDistilBert_FullSequence + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_en.md new file mode 100644 index 00000000000000..761bedec749872 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English db_fe_2_1 DistilBertForSequenceClassification from exala +author: John Snow Labs +name: db_fe_2_1 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`db_fe_2_1` is a English model originally trained by exala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/db_fe_2_1_en_5.5.0_3.0_1725489879827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/db_fe_2_1_en_5.5.0_3.0_1725489879827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("db_fe_2_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("db_fe_2_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|db_fe_2_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/exala/db_fe_2.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_pipeline_en.md new file mode 100644 index 00000000000000..e6e0ea4057ba2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-db_fe_2_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English db_fe_2_1_pipeline pipeline DistilBertForSequenceClassification from exala +author: John Snow Labs +name: db_fe_2_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`db_fe_2_1_pipeline` is a English model originally trained by exala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/db_fe_2_1_pipeline_en_5.5.0_3.0_1725489891800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/db_fe_2_1_pipeline_en_5.5.0_3.0_1725489891800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("db_fe_2_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("db_fe_2_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|db_fe_2_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/exala/db_fe_2.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dbert_pii_detection_model_omshikhare_en.md b/docs/_posts/ahmedlone127/2024-09-04-dbert_pii_detection_model_omshikhare_en.md new file mode 100644 index 00000000000000..5cbc97d6afd3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dbert_pii_detection_model_omshikhare_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dbert_pii_detection_model_omshikhare DistilBertForTokenClassification from omshikhare +author: John Snow Labs +name: dbert_pii_detection_model_omshikhare +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_pii_detection_model_omshikhare` is a English model originally trained by omshikhare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_pii_detection_model_omshikhare_en_5.5.0_3.0_1725460389656.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_pii_detection_model_omshikhare_en_5.5.0_3.0_1725460389656.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("dbert_pii_detection_model_omshikhare","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("dbert_pii_detection_model_omshikhare", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_pii_detection_model_omshikhare| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/omshikhare/dbert_pii_detection_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_en.md new file mode 100644 index 00000000000000..1af281ce6af67f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_amazon_reviews_v1_patrickvonplaten DeBertaForSequenceClassification from patrickvonplaten +author: John Snow Labs +name: deberta_amazon_reviews_v1_patrickvonplaten +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_amazon_reviews_v1_patrickvonplaten` is a English model originally trained by patrickvonplaten. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_patrickvonplaten_en_5.5.0_3.0_1725462234643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_patrickvonplaten_en_5.5.0_3.0_1725462234643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_patrickvonplaten","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_patrickvonplaten", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_amazon_reviews_v1_patrickvonplaten| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|621.3 MB| + +## References + +https://huggingface.co/patrickvonplaten/deberta_amazon_reviews_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en.md new file mode 100644 index 00000000000000..1754fdeb9208f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_amazon_reviews_v1_patrickvonplaten_pipeline pipeline DeBertaForSequenceClassification from patrickvonplaten +author: John Snow Labs +name: deberta_amazon_reviews_v1_patrickvonplaten_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_amazon_reviews_v1_patrickvonplaten_pipeline` is a English model originally trained by patrickvonplaten. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en_5.5.0_3.0_1725462296133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_patrickvonplaten_pipeline_en_5.5.0_3.0_1725462296133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_amazon_reviews_v1_patrickvonplaten_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_amazon_reviews_v1_patrickvonplaten_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_amazon_reviews_v1_patrickvonplaten_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|621.3 MB| + +## References + +https://huggingface.co/patrickvonplaten/deberta_amazon_reviews_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_base_german_fluency_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_base_german_fluency_en.md new file mode 100644 index 00000000000000..9d6fc4fd49b570 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_base_german_fluency_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_base_german_fluency DeBertaForSequenceClassification from EIStakovskii +author: John Snow Labs +name: deberta_base_german_fluency +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_base_german_fluency` is a English model originally trained by EIStakovskii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_base_german_fluency_en_5.5.0_3.0_1725439136544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_base_german_fluency_en_5.5.0_3.0_1725439136544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_base_german_fluency","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_base_german_fluency", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_base_german_fluency| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|605.0 MB| + +## References + +https://huggingface.co/EIStakovskii/deberta-base-german_fluency \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_base_metaphor_detection_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_base_metaphor_detection_english_pipeline_en.md new file mode 100644 index 00000000000000..7f8511e49b1aa8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_base_metaphor_detection_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_base_metaphor_detection_english_pipeline pipeline DeBertaForTokenClassification from HiTZ +author: John Snow Labs +name: deberta_base_metaphor_detection_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_base_metaphor_detection_english_pipeline` is a English model originally trained by HiTZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_base_metaphor_detection_english_pipeline_en_5.5.0_3.0_1725475508097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_base_metaphor_detection_english_pipeline_en_5.5.0_3.0_1725475508097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_base_metaphor_detection_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_base_metaphor_detection_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_base_metaphor_detection_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|596.9 MB| + +## References + +https://huggingface.co/HiTZ/deberta-base-metaphor-detection-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_classification_base_prompt_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_classification_base_prompt_en.md new file mode 100644 index 00000000000000..4b915556a352e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_classification_base_prompt_en.md @@ -0,0 +1,111 @@ +--- +layout: model +title: English DeBertaForSequenceClassification (from protectai) +author: John Snow Labs +name: deberta_classification_base_prompt +date: 2024-09-04 +tags: [sequence_classification, deberta, openvino, en, open_source] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +“ + + +DeBERTa v3 model with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks. + +deberta_v3_base_sequence_classifier_imdb is a fine-tuned DeBERTa model that is ready to be used for Sequence Classification tasks such as sentiment analysis or multi-class text classification and it achieves state-of-the-art performance. + +We used TFDebertaV2ForSequenceClassification to train this model and used DeBertaForSequenceClassification annotator in Spark NLP 🚀 for prediction at scale! + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_classification_base_prompt_en_5.5.0_3.0_1725485967923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_classification_base_prompt_en_5.5.0_3.0_1725485967923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +document_assembler = DocumentAssembler()\ +.setInputCol("text")\ +.setOutputCol("document") + +tokenizer = Tokenizer()\ +.setInputCols(['document'])\ +.setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_classification_base_prompt", "en")\ +.setInputCols(["document", "token"])\ +.setOutputCol("class")\ +.setCaseSensitive(True)\ +.setMaxSentenceLength(512) + +pipeline = Pipeline(stages=[ +document_assembler, +tokenizer, +sequenceClassifier +]) + +example = spark.createDataFrame([['I really liked that movie!']]).toDF("text") +result = pipeline.fit(example).transform(example) + + +``` +```scala + +val document_assembler = new DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = new Tokenizer() +.setInputCols("document") +.setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_classification_base_prompt", "en") +.setInputCols("document", "token") +.setOutputCol("class") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document_assembler, tokenizer, sequenceClassifier)) + +val example = Seq("I really liked that movie!").toDS.toDF("text") + +val result = pipeline.fit(example).transform(example) + + + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_classification_base_prompt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, document]| +|Output Labels:|[label]| +|Language:|en| +|Size:|710.8 MB| +|Case sensitive:|true| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_classifier_feedback_1024_pseudo_final_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_classifier_feedback_1024_pseudo_final_pipeline_en.md new file mode 100644 index 00000000000000..c940f765855358 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_classifier_feedback_1024_pseudo_final_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_classifier_feedback_1024_pseudo_final_pipeline pipeline DeBertaForTokenClassification from TTian +author: John Snow Labs +name: deberta_classifier_feedback_1024_pseudo_final_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_classifier_feedback_1024_pseudo_final_pipeline` is a English model originally trained by TTian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_classifier_feedback_1024_pseudo_final_pipeline_en_5.5.0_3.0_1725472740332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_classifier_feedback_1024_pseudo_final_pipeline_en_5.5.0_3.0_1725472740332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_classifier_feedback_1024_pseudo_final_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_classifier_feedback_1024_pseudo_final_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_classifier_feedback_1024_pseudo_final_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/TTian/deberta-classifier-feedback-1024-pseudo-final + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_en.md new file mode 100644 index 00000000000000..a445178e6fcf37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_docnli_sentencelevel_ner_claim DeBertaForSequenceClassification from jeffyelson +author: John Snow Labs +name: deberta_docnli_sentencelevel_ner_claim +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_docnli_sentencelevel_ner_claim` is a English model originally trained by jeffyelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_docnli_sentencelevel_ner_claim_en_5.5.0_3.0_1725468824220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_docnli_sentencelevel_ner_claim_en_5.5.0_3.0_1725468824220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_docnli_sentencelevel_ner_claim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_docnli_sentencelevel_ner_claim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_docnli_sentencelevel_ner_claim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|594.3 MB| + +## References + +https://huggingface.co/jeffyelson/deberta_docnli_sentencelevel_ner_claim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_pipeline_en.md new file mode 100644 index 00000000000000..90bd6a77511623 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_docnli_sentencelevel_ner_claim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_docnli_sentencelevel_ner_claim_pipeline pipeline DeBertaForSequenceClassification from jeffyelson +author: John Snow Labs +name: deberta_docnli_sentencelevel_ner_claim_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_docnli_sentencelevel_ner_claim_pipeline` is a English model originally trained by jeffyelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_docnli_sentencelevel_ner_claim_pipeline_en_5.5.0_3.0_1725468863047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_docnli_sentencelevel_ner_claim_pipeline_en_5.5.0_3.0_1725468863047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_docnli_sentencelevel_ner_claim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_docnli_sentencelevel_ner_claim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_docnli_sentencelevel_ner_claim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|594.3 MB| + +## References + +https://huggingface.co/jeffyelson/deberta_docnli_sentencelevel_ner_claim + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_en.md new file mode 100644 index 00000000000000..c7a82c5f268cd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_finetune DeBertaForSequenceClassification from nc33 +author: John Snow Labs +name: deberta_finetune +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_finetune` is a English model originally trained by nc33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_finetune_en_5.5.0_3.0_1725463131730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_finetune_en_5.5.0_3.0_1725463131730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_finetune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_finetune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_finetune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|665.5 MB| + +## References + +https://huggingface.co/nc33/deberta_finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_pipeline_en.md new file mode 100644 index 00000000000000..8a77c8ea86a208 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_finetune_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_finetune_pipeline pipeline DeBertaForSequenceClassification from nc33 +author: John Snow Labs +name: deberta_finetune_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_finetune_pipeline` is a English model originally trained by nc33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_finetune_pipeline_en_5.5.0_3.0_1725463165782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_finetune_pipeline_en_5.5.0_3.0_1725463165782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_finetune_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_finetune_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_finetune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.6 MB| + +## References + +https://huggingface.co/nc33/deberta_finetune + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_en.md new file mode 100644 index 00000000000000..afc52ec917d480 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_sentencelevel_ner_claim DeBertaForSequenceClassification from jeffyelson +author: John Snow Labs +name: deberta_sentencelevel_ner_claim +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_sentencelevel_ner_claim` is a English model originally trained by jeffyelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_ner_claim_en_5.5.0_3.0_1725469099385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_ner_claim_en_5.5.0_3.0_1725469099385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_sentencelevel_ner_claim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_sentencelevel_ner_claim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_sentencelevel_ner_claim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|593.5 MB| + +## References + +https://huggingface.co/jeffyelson/deberta_sentencelevel_ner_claim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_pipeline_en.md new file mode 100644 index 00000000000000..473e592cbdddb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_sentencelevel_ner_claim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_sentencelevel_ner_claim_pipeline pipeline DeBertaForSequenceClassification from jeffyelson +author: John Snow Labs +name: deberta_sentencelevel_ner_claim_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_sentencelevel_ner_claim_pipeline` is a English model originally trained by jeffyelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_ner_claim_pipeline_en_5.5.0_3.0_1725469140590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_ner_claim_pipeline_en_5.5.0_3.0_1725469140590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_sentencelevel_ner_claim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_sentencelevel_ner_claim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_sentencelevel_ner_claim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|593.5 MB| + +## References + +https://huggingface.co/jeffyelson/deberta_sentencelevel_ner_claim + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_small_22feb_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_small_22feb_en.md new file mode 100644 index 00000000000000..d521ce43eb1633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_small_22feb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_small_22feb DeBertaForTokenClassification from codeaze +author: John Snow Labs +name: deberta_small_22feb +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_small_22feb` is a English model originally trained by codeaze. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_small_22feb_en_5.5.0_3.0_1725474368681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_small_22feb_en_5.5.0_3.0_1725474368681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_small_22feb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_small_22feb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_small_22feb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/codeaze/deberta_small_22feb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_tomatoes_sentiment_voodoo72_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_tomatoes_sentiment_voodoo72_en.md new file mode 100644 index 00000000000000..63deeb531d7447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_tomatoes_sentiment_voodoo72_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_tomatoes_sentiment_voodoo72 DeBertaForSequenceClassification from voodoo72 +author: John Snow Labs +name: deberta_tomatoes_sentiment_voodoo72 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_tomatoes_sentiment_voodoo72` is a English model originally trained by voodoo72. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_tomatoes_sentiment_voodoo72_en_5.5.0_3.0_1725438847420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_tomatoes_sentiment_voodoo72_en_5.5.0_3.0_1725438847420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_tomatoes_sentiment_voodoo72","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_tomatoes_sentiment_voodoo72", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_tomatoes_sentiment_voodoo72| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|589.9 MB| + +## References + +https://huggingface.co/voodoo72/deberta-tomatoes-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v2_base_japanese_finetuned_emotion_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v2_base_japanese_finetuned_emotion_pipeline_en.md new file mode 100644 index 00000000000000..3deea31355542e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v2_base_japanese_finetuned_emotion_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v2_base_japanese_finetuned_emotion_pipeline pipeline DeBertaForSequenceClassification from nasuka +author: John Snow Labs +name: deberta_v2_base_japanese_finetuned_emotion_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v2_base_japanese_finetuned_emotion_pipeline` is a English model originally trained by nasuka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v2_base_japanese_finetuned_emotion_pipeline_en_5.5.0_3.0_1725439831651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v2_base_japanese_finetuned_emotion_pipeline_en_5.5.0_3.0_1725439831651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v2_base_japanese_finetuned_emotion_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v2_base_japanese_finetuned_emotion_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v2_base_japanese_finetuned_emotion_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|421.2 MB| + +## References + +https://huggingface.co/nasuka/deberta-v2-base-japanese-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_10xp3_10xc4_128_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_10xp3_10xc4_128_pipeline_en.md new file mode 100644 index 00000000000000..8a1644a1b2dd42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_10xp3_10xc4_128_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_10xp3_10xc4_128_pipeline pipeline DeBertaForSequenceClassification from taskydata +author: John Snow Labs +name: deberta_v3_base_10xp3_10xc4_128_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_10xp3_10xc4_128_pipeline` is a English model originally trained by taskydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_10xp3_10xc4_128_pipeline_en_5.5.0_3.0_1725462771269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_10xp3_10xc4_128_pipeline_en_5.5.0_3.0_1725462771269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_10xp3_10xc4_128_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_10xp3_10xc4_128_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_10xp3_10xc4_128_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|687.3 MB| + +## References + +https://huggingface.co/taskydata/deberta-v3-base_10xp3_10xc4_128 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_1107_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_1107_pipeline_en.md new file mode 100644 index 00000000000000..02bc45379ff66b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_1107_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_1107_pipeline pipeline DeBertaForSequenceClassification from xoyeop +author: John Snow Labs +name: deberta_v3_base_1107_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_1107_pipeline` is a English model originally trained by xoyeop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_1107_pipeline_en_5.5.0_3.0_1725468410273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_1107_pipeline_en_5.5.0_3.0_1725468410273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_1107_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_1107_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_1107_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|640.6 MB| + +## References + +https://huggingface.co/xoyeop/deberta-v3-base-1107 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_ai4privacy_english_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_ai4privacy_english_en.md new file mode 100644 index 00000000000000..7f6e4149dafa58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_ai4privacy_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_ai4privacy_english DeBertaForTokenClassification from xXiaobuding +author: John Snow Labs +name: deberta_v3_base_ai4privacy_english +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_ai4privacy_english` is a English model originally trained by xXiaobuding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_ai4privacy_english_en_5.5.0_3.0_1725473447117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_ai4privacy_english_en_5.5.0_3.0_1725473447117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_ai4privacy_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_ai4privacy_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_ai4privacy_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|596.2 MB| + +## References + +https://huggingface.co/xXiaobuding/deberta-v3-base_ai4privacy_en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_en.md new file mode 100644 index 00000000000000..2eb16fbd91321a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_civil_comments_wilds_5k DeBertaForSequenceClassification from shlomihod +author: John Snow Labs +name: deberta_v3_base_civil_comments_wilds_5k +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_civil_comments_wilds_5k` is a English model originally trained by shlomihod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_civil_comments_wilds_5k_en_5.5.0_3.0_1725467995593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_civil_comments_wilds_5k_en_5.5.0_3.0_1725467995593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_civil_comments_wilds_5k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_civil_comments_wilds_5k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_civil_comments_wilds_5k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|611.0 MB| + +## References + +https://huggingface.co/shlomihod/deberta-v3-base-civil-comments-wilds-5k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_pipeline_en.md new file mode 100644 index 00000000000000..73734e62047251 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_civil_comments_wilds_5k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_civil_comments_wilds_5k_pipeline pipeline DeBertaForSequenceClassification from shlomihod +author: John Snow Labs +name: deberta_v3_base_civil_comments_wilds_5k_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_civil_comments_wilds_5k_pipeline` is a English model originally trained by shlomihod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_civil_comments_wilds_5k_pipeline_en_5.5.0_3.0_1725468041725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_civil_comments_wilds_5k_pipeline_en_5.5.0_3.0_1725468041725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_civil_comments_wilds_5k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_civil_comments_wilds_5k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_civil_comments_wilds_5k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|611.0 MB| + +## References + +https://huggingface.co/shlomihod/deberta-v3-base-civil-comments-wilds-5k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_en.md new file mode 100644 index 00000000000000..7d8dd516ebadd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_cola_yevheniimaslov DeBertaForSequenceClassification from yevheniimaslov +author: John Snow Labs +name: deberta_v3_base_cola_yevheniimaslov +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_cola_yevheniimaslov` is a English model originally trained by yevheniimaslov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_cola_yevheniimaslov_en_5.5.0_3.0_1725469284321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_cola_yevheniimaslov_en_5.5.0_3.0_1725469284321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_cola_yevheniimaslov","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_cola_yevheniimaslov", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_cola_yevheniimaslov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|559.9 MB| + +## References + +https://huggingface.co/yevheniimaslov/deberta-v3-base-cola \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_pipeline_en.md new file mode 100644 index 00000000000000..a88b6ce50619fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_cola_yevheniimaslov_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_cola_yevheniimaslov_pipeline pipeline DeBertaForSequenceClassification from yevheniimaslov +author: John Snow Labs +name: deberta_v3_base_cola_yevheniimaslov_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_cola_yevheniimaslov_pipeline` is a English model originally trained by yevheniimaslov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_cola_yevheniimaslov_pipeline_en_5.5.0_3.0_1725469368380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_cola_yevheniimaslov_pipeline_en_5.5.0_3.0_1725469368380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_cola_yevheniimaslov_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_cola_yevheniimaslov_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_cola_yevheniimaslov_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|559.9 MB| + +## References + +https://huggingface.co/yevheniimaslov/deberta-v3-base-cola + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_bluegennx_run2_19_5e_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_bluegennx_run2_19_5e_en.md new file mode 100644 index 00000000000000..ba1b9079774537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_bluegennx_run2_19_5e_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_finetuned_bluegennx_run2_19_5e DeBertaForTokenClassification from C4Scale +author: John Snow Labs +name: deberta_v3_base_finetuned_bluegennx_run2_19_5e +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_finetuned_bluegennx_run2_19_5e` is a English model originally trained by C4Scale. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_bluegennx_run2_19_5e_en_5.5.0_3.0_1725471532525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_bluegennx_run2_19_5e_en_5.5.0_3.0_1725471532525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_finetuned_bluegennx_run2_19_5e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_finetuned_bluegennx_run2_19_5e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_finetuned_bluegennx_run2_19_5e| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|609.3 MB| + +## References + +https://huggingface.co/C4Scale/deberta-v3-base_finetuned_bluegennx_run2.19_5e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_french_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_french_en.md new file mode 100644 index 00000000000000..33439bf07f253d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_finetuned_french DeBertaForSequenceClassification from KhawajaAbaid +author: John Snow Labs +name: deberta_v3_base_finetuned_french +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_finetuned_french` is a English model originally trained by KhawajaAbaid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_french_en_5.5.0_3.0_1725462519185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_french_en_5.5.0_3.0_1725462519185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_finetuned_french","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_finetuned_french", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_finetuned_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|610.1 MB| + +## References + +https://huggingface.co/KhawajaAbaid/deberta-v3-base-finetuned-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en.md new file mode 100644 index 00000000000000..89db510b69d2cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_finetuned_mcqa_manyet1k_pipeline pipeline DeBertaForSequenceClassification from manyet1k +author: John Snow Labs +name: deberta_v3_base_finetuned_mcqa_manyet1k_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_finetuned_mcqa_manyet1k_pipeline` is a English model originally trained by manyet1k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en_5.5.0_3.0_1725461886456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_mcqa_manyet1k_pipeline_en_5.5.0_3.0_1725461886456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_finetuned_mcqa_manyet1k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_finetuned_mcqa_manyet1k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_finetuned_mcqa_manyet1k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|575.1 MB| + +## References + +https://huggingface.co/manyet1k/deberta-v3-base-finetuned-mcqa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_qnli_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_qnli_en.md new file mode 100644 index 00000000000000..508deeac4b5db9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_qnli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_qnli DeBertaForSequenceClassification from cliang1453 +author: John Snow Labs +name: deberta_v3_base_qnli +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_qnli` is a English model originally trained by cliang1453. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qnli_en_5.5.0_3.0_1725468370331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qnli_en_5.5.0_3.0_1725468370331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_qnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_qnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_qnli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|637.9 MB| + +## References + +https://huggingface.co/cliang1453/deberta-v3-base-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_sst2_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_sst2_en.md new file mode 100644 index 00000000000000..7808abf81c73f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_sst2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_sst2 DeBertaForSequenceClassification from cliang1453 +author: John Snow Labs +name: deberta_v3_base_sst2 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_sst2` is a English model originally trained by cliang1453. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_sst2_en_5.5.0_3.0_1725462870622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_sst2_en_5.5.0_3.0_1725462870622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_sst2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|574.1 MB| + +## References + +https://huggingface.co/cliang1453/deberta-v3-base-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_en.md new file mode 100644 index 00000000000000..2618eb32db3063 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_whatsapp_ner DeBertaForTokenClassification from Venkatesh4342 +author: John Snow Labs +name: deberta_v3_base_whatsapp_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_whatsapp_ner` is a English model originally trained by Venkatesh4342. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_whatsapp_ner_en_5.5.0_3.0_1725473698672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_whatsapp_ner_en_5.5.0_3.0_1725473698672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_whatsapp_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_base_whatsapp_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_whatsapp_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|564.4 MB| + +## References + +https://huggingface.co/Venkatesh4342/deberta-v3-base-Whatsapp-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_pipeline_en.md new file mode 100644 index 00000000000000..40a24dc75f212e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_whatsapp_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_whatsapp_ner_pipeline pipeline DeBertaForTokenClassification from Venkatesh4342 +author: John Snow Labs +name: deberta_v3_base_whatsapp_ner_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_whatsapp_ner_pipeline` is a English model originally trained by Venkatesh4342. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_whatsapp_ner_pipeline_en_5.5.0_3.0_1725473766933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_whatsapp_ner_pipeline_en_5.5.0_3.0_1725473766933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_whatsapp_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_whatsapp_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_whatsapp_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|564.4 MB| + +## References + +https://huggingface.co/Venkatesh4342/deberta-v3-base-Whatsapp-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_zeroshot_v2_0_28heldout_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_zeroshot_v2_0_28heldout_en.md new file mode 100644 index 00000000000000..a09e1766a4192d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_base_zeroshot_v2_0_28heldout_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_zeroshot_v2_0_28heldout DeBertaForSequenceClassification from MoritzLaurer +author: John Snow Labs +name: deberta_v3_base_zeroshot_v2_0_28heldout +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_zeroshot_v2_0_28heldout` is a English model originally trained by MoritzLaurer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_zeroshot_v2_0_28heldout_en_5.5.0_3.0_1725467700900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_zeroshot_v2_0_28heldout_en_5.5.0_3.0_1725467700900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_zeroshot_v2_0_28heldout","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_zeroshot_v2_0_28heldout", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_zeroshot_v2_0_28heldout| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|440.3 MB| + +## References + +https://huggingface.co/MoritzLaurer/deberta-v3-base-zeroshot-v2.0-28heldout \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en.md new file mode 100644 index 00000000000000..aa2785af09f2ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_ad_opentag_finetuned_ner_5epochs DeBertaForTokenClassification from ABrinkmann +author: John Snow Labs +name: deberta_v3_large_ad_opentag_finetuned_ner_5epochs +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_ad_opentag_finetuned_ner_5epochs` is a English model originally trained by ABrinkmann. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en_5.5.0_3.0_1725473069031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_ad_opentag_finetuned_ner_5epochs_en_5.5.0_3.0_1725473069031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_large_ad_opentag_finetuned_ner_5epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("deberta_v3_large_ad_opentag_finetuned_ner_5epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_ad_opentag_finetuned_ner_5epochs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/ABrinkmann/deberta-v3-large-ad-opentag-finetuned-ner-5epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_classifier_en.md new file mode 100644 index 00000000000000..d2aaf6732a5c7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_classifier DeBertaForSequenceClassification from KatoHF +author: John Snow Labs +name: deberta_v3_large_classifier +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_classifier` is a English model originally trained by KatoHF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_classifier_en_5.5.0_3.0_1725463902793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_classifier_en_5.5.0_3.0_1725463902793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.0 MB| + +## References + +https://huggingface.co/KatoHF/deberta-v3-large-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_fever_pepa_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_fever_pepa_en.md new file mode 100644 index 00000000000000..7dd81e6bbb78be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_fever_pepa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_fever_pepa DeBertaForSequenceClassification from pepa +author: John Snow Labs +name: deberta_v3_large_fever_pepa +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_fever_pepa` is a English model originally trained by pepa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_fever_pepa_en_5.5.0_3.0_1725462375747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_fever_pepa_en_5.5.0_3.0_1725462375747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_fever_pepa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_fever_pepa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_fever_pepa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/pepa/deberta-v3-large-fever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en.md new file mode 100644 index 00000000000000..566d9edbbd22fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_ner_10epochs_v2_pipeline pipeline DeBertaForTokenClassification from ABrinkmann +author: John Snow Labs +name: deberta_v3_large_finetuned_ner_10epochs_v2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_ner_10epochs_v2_pipeline` is a English model originally trained by ABrinkmann. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en_5.5.0_3.0_1725474522859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_ner_10epochs_v2_pipeline_en_5.5.0_3.0_1725474522859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_finetuned_ner_10epochs_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_finetuned_ner_10epochs_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_ner_10epochs_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/ABrinkmann/deberta-v3-large-finetuned-ner-10epochs-V2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_en.md new file mode 100644 index 00000000000000..21b76bc26b8c60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_synthetic_paraphrase_only DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_finetuned_synthetic_paraphrase_only +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_synthetic_paraphrase_only` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_synthetic_paraphrase_only_en_5.5.0_3.0_1725468253280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_synthetic_paraphrase_only_en_5.5.0_3.0_1725468253280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_finetuned_synthetic_paraphrase_only","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_finetuned_synthetic_paraphrase_only", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_synthetic_paraphrase_only| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-finetuned-synthetic-paraphrase-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en.md new file mode 100644 index 00000000000000..6de0124276fc7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en_5.5.0_3.0_1725468330509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline_en_5.5.0_3.0_1725468330509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_synthetic_paraphrase_only_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-finetuned-synthetic-paraphrase-only + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_sentiment_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_sentiment_pipeline_en.md new file mode 100644 index 00000000000000..379c9441521134 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_sentiment_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_sentiment_pipeline pipeline DeBertaForSequenceClassification from Elron +author: John Snow Labs +name: deberta_v3_large_sentiment_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_sentiment_pipeline` is a English model originally trained by Elron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_sentiment_pipeline_en_5.5.0_3.0_1725463799971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_sentiment_pipeline_en_5.5.0_3.0_1725463799971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_sentiment_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_sentiment_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_sentiment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Elron/deberta-v3-large-sentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en.md new file mode 100644 index 00000000000000..3ea60a04bc2419 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4 DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en_5.5.0_3.0_1725469430551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4_en_5.5.0_3.0_1725469430551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_main_passage_old_facts_rater_all_gpt4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-main_passage_old_facts-rater-all-gpt4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en.md new file mode 100644 index 00000000000000..daa1af95049679 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4 DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en_5.5.0_3.0_1725463538060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4_en_5.5.0_3.0_1725463538060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_related_passage_old_facts_rater_all_gpt4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-related_passage_old_facts-rater-all-gpt4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_smallsed_rte_finetuned_rte_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_smallsed_rte_finetuned_rte_pipeline_en.md new file mode 100644 index 00000000000000..745f53d9b7e811 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_smallsed_rte_finetuned_rte_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_smallsed_rte_finetuned_rte_pipeline pipeline DeBertaForSequenceClassification from ZaaCo +author: John Snow Labs +name: deberta_v3_smallsed_rte_finetuned_rte_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_smallsed_rte_finetuned_rte_pipeline` is a English model originally trained by ZaaCo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_smallsed_rte_finetuned_rte_pipeline_en_5.5.0_3.0_1725469113862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_smallsed_rte_finetuned_rte_pipeline_en_5.5.0_3.0_1725469113862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_smallsed_rte_finetuned_rte_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_smallsed_rte_finetuned_rte_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_smallsed_rte_finetuned_rte_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|417.8 MB| + +## References + +https://huggingface.co/ZaaCo/deberta-v3-smallsed_rte-finetuned-rte + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_xsmall_mnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_xsmall_mnli_pipeline_en.md new file mode 100644 index 00000000000000..34c78976c7f362 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_v3_xsmall_mnli_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_xsmall_mnli_pipeline pipeline DeBertaForSequenceClassification from cliang1453 +author: John Snow Labs +name: deberta_v3_xsmall_mnli_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_xsmall_mnli_pipeline` is a English model originally trained by cliang1453. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_xsmall_mnli_pipeline_en_5.5.0_3.0_1725468526808.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_xsmall_mnli_pipeline_en_5.5.0_3.0_1725468526808.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_xsmall_mnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_xsmall_mnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_xsmall_mnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|241.3 MB| + +## References + +https://huggingface.co/cliang1453/deberta-v3-xsmall-mnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_en.md new file mode 100644 index 00000000000000..6cf594da6b128a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_xlarge_em_abt_buy DeBertaForSequenceClassification from kerpr +author: John Snow Labs +name: deberta_xlarge_em_abt_buy +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_xlarge_em_abt_buy` is a English model originally trained by kerpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_xlarge_em_abt_buy_en_5.5.0_3.0_1725462091466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_xlarge_em_abt_buy_en_5.5.0_3.0_1725462091466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_xlarge_em_abt_buy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_xlarge_em_abt_buy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_xlarge_em_abt_buy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/kerpr/deberta-xlarge-em-abt-buy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_pipeline_en.md new file mode 100644 index 00000000000000..efb9f3e003415c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deberta_xlarge_em_abt_buy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_xlarge_em_abt_buy_pipeline pipeline DeBertaForSequenceClassification from kerpr +author: John Snow Labs +name: deberta_xlarge_em_abt_buy_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_xlarge_em_abt_buy_pipeline` is a English model originally trained by kerpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_xlarge_em_abt_buy_pipeline_en_5.5.0_3.0_1725462235394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_xlarge_em_abt_buy_pipeline_en_5.5.0_3.0_1725462235394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_xlarge_em_abt_buy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_xlarge_em_abt_buy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_xlarge_em_abt_buy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/kerpr/deberta-xlarge-em-abt-buy + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_en.md b/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_en.md new file mode 100644 index 00000000000000..0162f24f2a73e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English debertabaseemotionbalanced DeBertaForSequenceClassification from aliciiavs +author: John Snow Labs +name: debertabaseemotionbalanced +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`debertabaseemotionbalanced` is a English model originally trained by aliciiavs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/debertabaseemotionbalanced_en_5.5.0_3.0_1725469309837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/debertabaseemotionbalanced_en_5.5.0_3.0_1725469309837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("debertabaseemotionbalanced","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("debertabaseemotionbalanced", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|debertabaseemotionbalanced| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|593.4 MB| + +## References + +https://huggingface.co/aliciiavs/debertabaseemotionbalanced \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_pipeline_en.md new file mode 100644 index 00000000000000..fd29176ae049fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-debertabaseemotionbalanced_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English debertabaseemotionbalanced_pipeline pipeline DeBertaForSequenceClassification from aliciiavs +author: John Snow Labs +name: debertabaseemotionbalanced_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`debertabaseemotionbalanced_pipeline` is a English model originally trained by aliciiavs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/debertabaseemotionbalanced_pipeline_en_5.5.0_3.0_1725469360475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/debertabaseemotionbalanced_pipeline_en_5.5.0_3.0_1725469360475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("debertabaseemotionbalanced_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("debertabaseemotionbalanced_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|debertabaseemotionbalanced_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|593.4 MB| + +## References + +https://huggingface.co/aliciiavs/debertabaseemotionbalanced + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-deep_2_en.md b/docs/_posts/ahmedlone127/2024-09-04-deep_2_en.md new file mode 100644 index 00000000000000..6611727d2b7622 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-deep_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deep_2 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: deep_2 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deep_2` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deep_2_en_5.5.0_3.0_1725453403689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deep_2_en_5.5.0_3.0_1725453403689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("deep_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("deep_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deep_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Deep_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-delivery_balanced_distilbert_base_uncased_v2_en.md b/docs/_posts/ahmedlone127/2024-09-04-delivery_balanced_distilbert_base_uncased_v2_en.md new file mode 100644 index 00000000000000..0ae115661a6471 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-delivery_balanced_distilbert_base_uncased_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English delivery_balanced_distilbert_base_uncased_v2 DistilBertForSequenceClassification from chuuhtetnaing +author: John Snow Labs +name: delivery_balanced_distilbert_base_uncased_v2 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`delivery_balanced_distilbert_base_uncased_v2` is a English model originally trained by chuuhtetnaing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/delivery_balanced_distilbert_base_uncased_v2_en_5.5.0_3.0_1725489504577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/delivery_balanced_distilbert_base_uncased_v2_en_5.5.0_3.0_1725489504577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("delivery_balanced_distilbert_base_uncased_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("delivery_balanced_distilbert_base_uncased_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|delivery_balanced_distilbert_base_uncased_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/chuuhtetnaing/delivery-balanced-distilbert-base-uncased-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-disbert_finetune_for_gentriple_en.md b/docs/_posts/ahmedlone127/2024-09-04-disbert_finetune_for_gentriple_en.md new file mode 100644 index 00000000000000..a83af98a17010f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-disbert_finetune_for_gentriple_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English disbert_finetune_for_gentriple DistilBertForTokenClassification from Malcolmcjj13 +author: John Snow Labs +name: disbert_finetune_for_gentriple +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disbert_finetune_for_gentriple` is a English model originally trained by Malcolmcjj13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disbert_finetune_for_gentriple_en_5.5.0_3.0_1725475759188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disbert_finetune_for_gentriple_en_5.5.0_3.0_1725475759188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("disbert_finetune_for_gentriple","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("disbert_finetune_for_gentriple", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disbert_finetune_for_gentriple| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Malcolmcjj13/_disbert_finetune_for_gentriple \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distil_bert_docred_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-distil_bert_docred_ner_en.md new file mode 100644 index 00000000000000..68974eca68516f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distil_bert_docred_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distil_bert_docred_ner DistilBertForTokenClassification from dennishauser +author: John Snow Labs +name: distil_bert_docred_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_bert_docred_ner` is a English model originally trained by dennishauser. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_bert_docred_ner_en_5.5.0_3.0_1725448688669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_bert_docred_ner_en_5.5.0_3.0_1725448688669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distil_bert_docred_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distil_bert_docred_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_bert_docred_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dennishauser/distil-bert-docred-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_conll2003_english_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_conll2003_english_ner_en.md new file mode 100644 index 00000000000000..e52f85604dd8e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_conll2003_english_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_conll2003_english_ner DistilBertForTokenClassification from MrRobson9 +author: John Snow Labs +name: distilbert_base_cased_finetuned_conll2003_english_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_conll2003_english_ner` is a English model originally trained by MrRobson9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_conll2003_english_ner_en_5.5.0_3.0_1725448198912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_conll2003_english_ner_en_5.5.0_3.0_1725448198912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_finetuned_conll2003_english_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_finetuned_conll2003_english_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_conll2003_english_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MrRobson9/distilbert-base-cased-finetuned-conll2003-english-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..4039655fc4d041 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_pipeline pipeline DistilBertEmbeddings from GusNicho +author: John Snow Labs +name: distilbert_base_cased_finetuned_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_pipeline` is a English model originally trained by GusNicho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_pipeline_en_5.5.0_3.0_1725414412546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_pipeline_en_5.5.0_3.0_1725414412546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/GusNicho/distilbert-base-cased-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_pii_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_pii_english_pipeline_en.md new file mode 100644 index 00000000000000..360f2b4997fb77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_cased_pii_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_cased_pii_english_pipeline pipeline DistilBertForTokenClassification from yonigo +author: John Snow Labs +name: distilbert_base_cased_pii_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_pii_english_pipeline` is a English model originally trained by yonigo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_pii_english_pipeline_en_5.5.0_3.0_1725448451233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_pii_english_pipeline_en_5.5.0_3.0_1725448451233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_pii_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_pii_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_pii_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/yonigo/distilbert-base-cased-pii-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_data_wnut_17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_data_wnut_17_pipeline_en.md new file mode 100644 index 00000000000000..07fccbc96ff5ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_data_wnut_17_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_data_wnut_17_pipeline pipeline DistilBertForTokenClassification from Pongprecha +author: John Snow Labs +name: distilbert_base_data_wnut_17_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_data_wnut_17_pipeline` is a English model originally trained by Pongprecha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_data_wnut_17_pipeline_en_5.5.0_3.0_1725476530373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_data_wnut_17_pipeline_en_5.5.0_3.0_1725476530373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_data_wnut_17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_data_wnut_17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_data_wnut_17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Pongprecha/distilbert_base_data_wnut_17 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_english_greek_modern_cased_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_english_greek_modern_cased_en.md new file mode 100644 index 00000000000000..d6267ab5d1dbae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_english_greek_modern_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_english_greek_modern_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_greek_modern_cased +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_greek_modern_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_greek_modern_cased_en_5.5.0_3.0_1725414396779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_greek_modern_cased_en_5.5.0_3.0_1725414396779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_english_greek_modern_cased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_english_greek_modern_cased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_greek_modern_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-el-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx.md new file mode 100644 index 00000000000000..d6471746380471 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline pipeline DistilBertEmbeddings from lusxvr +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline` is a Multilingual model originally trained by lusxvr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx_5.5.0_3.0_1725413926804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline_xx_5.5.0_3.0_1725413926804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_english_portuguese_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/lusxvr/distilbert-base-multilingual-cased-finetuned-en_pt_es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en.md new file mode 100644 index 00000000000000..5726c6b97b24aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_adl_hw1_russianroulette DistilBertForSequenceClassification from russianroulette +author: John Snow Labs +name: distilbert_base_uncased_finetuned_adl_hw1_russianroulette +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_adl_hw1_russianroulette` is a English model originally trained by russianroulette. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en_5.5.0_3.0_1725490152606.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_adl_hw1_russianroulette_en_5.5.0_3.0_1725490152606.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_adl_hw1_russianroulette","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_adl_hw1_russianroulette", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_adl_hw1_russianroulette| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/russianroulette/distilbert-base-uncased-finetuned-adl_hw1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en.md new file mode 100644 index 00000000000000..858c7c5ed911f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline pipeline DistilBertForSequenceClassification from russianroulette +author: John Snow Labs +name: distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline` is a English model originally trained by russianroulette. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en_5.5.0_3.0_1725490164987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline_en_5.5.0_3.0_1725490164987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_adl_hw1_russianroulette_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/russianroulette/distilbert-base-uncased-finetuned-adl_hw1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_en.md new file mode 100644 index 00000000000000..e717de4b045c2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_aicoder009 DistilBertForSequenceClassification from AICODER009 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_aicoder009 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_aicoder009` is a English model originally trained by AICODER009. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_aicoder009_en_5.5.0_3.0_1725489699830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_aicoder009_en_5.5.0_3.0_1725489699830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_aicoder009","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_aicoder009", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_aicoder009| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/AICODER009/distilbert-base-uncased-finetuned-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en.md new file mode 100644 index 00000000000000..424294251e4935 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline pipeline DistilBertForSequenceClassification from AICODER009 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline` is a English model originally trained by AICODER009. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en_5.5.0_3.0_1725489711778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline_en_5.5.0_3.0_1725489711778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_aicoder009_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/AICODER009/distilbert-base-uncased-finetuned-clinc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_einsteinkim_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_einsteinkim_en.md new file mode 100644 index 00000000000000..c469631dcfd4c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_clinc_einsteinkim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_einsteinkim DistilBertForSequenceClassification from EinsteinKim +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_einsteinkim +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_einsteinkim` is a English model originally trained by EinsteinKim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_einsteinkim_en_5.5.0_3.0_1725490064150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_einsteinkim_en_5.5.0_3.0_1725490064150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_einsteinkim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_einsteinkim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_einsteinkim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/EinsteinKim/distilbert-base-uncased-finetuned-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en.md new file mode 100644 index 00000000000000..a316da7250bdce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cola_dev2k_pipeline pipeline DistilBertForSequenceClassification from dev2k +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cola_dev2k_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cola_dev2k_pipeline` is a English model originally trained by dev2k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en_5.5.0_3.0_1725489821953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cola_dev2k_pipeline_en_5.5.0_3.0_1725489821953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_cola_dev2k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_cola_dev2k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cola_dev2k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/dev2k/distilbert-base-uncased-finetuned-cola + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en.md new file mode 100644 index 00000000000000..fd4f3dc869f6e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline pipeline DistilBertForSequenceClassification from WzY1924561588 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline` is a English model originally trained by WzY1924561588. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en_5.5.0_3.0_1725489519494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline_en_5.5.0_3.0_1725489519494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_wzy1924561588_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/WzY1924561588/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_finer_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_finer_test_pipeline_en.md new file mode 100644 index 00000000000000..cbd9ddee124cc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_finer_test_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_finer_test_pipeline pipeline DistilBertForTokenClassification from bodias +author: John Snow Labs +name: distilbert_base_uncased_finetuned_finer_test_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_finer_test_pipeline` is a English model originally trained by bodias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_finer_test_pipeline_en_5.5.0_3.0_1725492660177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_finer_test_pipeline_en_5.5.0_3.0_1725492660177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_finer_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_finer_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_finer_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bodias/distilbert-base-uncased-finetuned-FiNER_test + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb1004_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb1004_en.md new file mode 100644 index 00000000000000..4a678150c0d737 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb1004_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb1004 DistilBertEmbeddings from Favourphilic +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb1004 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb1004` is a English model originally trained by Favourphilic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb1004_en_5.5.0_3.0_1725418555529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb1004_en_5.5.0_3.0_1725418555529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb1004","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb1004","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb1004| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Favourphilic/distilbert-base-uncased-finetuned-imdb1004 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en.md new file mode 100644 index 00000000000000..1fff4f206adf52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline pipeline DistilBertEmbeddings from ce-kishi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline` is a English model originally trained by ce-kishi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en_5.5.0_3.0_1725418692034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline_en_5.5.0_3.0_1725418692034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ce_kishi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ce-kishi/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_en.md new file mode 100644 index 00000000000000..a968667d560927 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dvijay DistilBertEmbeddings from dvijay +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dvijay +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dvijay` is a English model originally trained by dvijay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dvijay_en_5.5.0_3.0_1725414229944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dvijay_en_5.5.0_3.0_1725414229944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dvijay","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dvijay","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dvijay| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dvijay/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en.md new file mode 100644 index 00000000000000..2ef81f3234d042 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dvijay_pipeline pipeline DistilBertEmbeddings from dvijay +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dvijay_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dvijay_pipeline` is a English model originally trained by dvijay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en_5.5.0_3.0_1725414243423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dvijay_pipeline_en_5.5.0_3.0_1725414243423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_dvijay_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_dvijay_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dvijay_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dvijay/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_greyfoss_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_greyfoss_en.md new file mode 100644 index 00000000000000..3769b83cd2d7cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_greyfoss_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_greyfoss DistilBertEmbeddings from greyfoss +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_greyfoss +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_greyfoss` is a English model originally trained by greyfoss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_greyfoss_en_5.5.0_3.0_1725413983227.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_greyfoss_en_5.5.0_3.0_1725413983227.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_greyfoss","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_greyfoss","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_greyfoss| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/greyfoss/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_en.md new file mode 100644 index 00000000000000..cac85827acdf2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jaybdev DistilBertEmbeddings from JayBDev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jaybdev +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jaybdev` is a English model originally trained by JayBDev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaybdev_en_5.5.0_3.0_1725414000081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaybdev_en_5.5.0_3.0_1725414000081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jaybdev","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jaybdev","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jaybdev| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JayBDev/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en.md new file mode 100644 index 00000000000000..2e52bc3ccc8fbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline pipeline DistilBertEmbeddings from JayBDev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline` is a English model originally trained by JayBDev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en_5.5.0_3.0_1725414014066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline_en_5.5.0_3.0_1725414014066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jaybdev_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JayBDev/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_r0in_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_r0in_en.md new file mode 100644 index 00000000000000..6508f396eb2e6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_r0in_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_r0in DistilBertEmbeddings from r0in +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_r0in +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_r0in` is a English model originally trained by r0in. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_r0in_en_5.5.0_3.0_1725413765308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_r0in_en_5.5.0_3.0_1725413765308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_r0in","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_r0in","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_r0in| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/r0in/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_sbulut_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_sbulut_en.md new file mode 100644 index 00000000000000..ecc65d22a65878 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_sbulut_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sbulut DistilBertEmbeddings from sbulut +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sbulut +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sbulut` is a English model originally trained by sbulut. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sbulut_en_5.5.0_3.0_1725414400362.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sbulut_en_5.5.0_3.0_1725414400362.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sbulut","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sbulut","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sbulut| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sbulut/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en.md new file mode 100644 index 00000000000000..b73485de70ef2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline pipeline DistilBertEmbeddings from shahzebnaveed +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline` is a English model originally trained by shahzebnaveed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en_5.5.0_3.0_1725418144977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline_en_5.5.0_3.0_1725418144977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_shahzebnaveed_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shahzebnaveed/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_en.md new file mode 100644 index 00000000000000..792629c0f917bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thepines DistilBertEmbeddings from ThepineS +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thepines +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thepines` is a English model originally trained by ThepineS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thepines_en_5.5.0_3.0_1725418460520.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thepines_en_5.5.0_3.0_1725418460520.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thepines","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thepines","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thepines| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ThepineS/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en.md new file mode 100644 index 00000000000000..9397f0cd138c98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thepines_pipeline pipeline DistilBertEmbeddings from ThepineS +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thepines_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thepines_pipeline` is a English model originally trained by ThepineS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en_5.5.0_3.0_1725418473410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thepines_pipeline_en_5.5.0_3.0_1725418473410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_thepines_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_thepines_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thepines_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ThepineS/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_walterg777_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_walterg777_en.md new file mode 100644 index 00000000000000..fa287cd7fe97b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_imdb_walterg777_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_walterg777 DistilBertEmbeddings from walterg777 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_walterg777 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_walterg777` is a English model originally trained by walterg777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_walterg777_en_5.5.0_3.0_1725418761803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_walterg777_en_5.5.0_3.0_1725418761803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_walterg777","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_walterg777","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_walterg777| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/walterg777/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_masakhanenews_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_masakhanenews_en.md new file mode 100644 index 00000000000000..93c877108903c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_masakhanenews_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_masakhanenews DistilBertEmbeddings from Dangurangu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_masakhanenews +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_masakhanenews` is a English model originally trained by Dangurangu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_masakhanenews_en_5.5.0_3.0_1725413763539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_masakhanenews_en_5.5.0_3.0_1725413763539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_masakhanenews","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_masakhanenews","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_masakhanenews| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dangurangu/distilbert-base-uncased-finetuned-masakhaneNews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en.md new file mode 100644 index 00000000000000..86aa450408918b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline pipeline DistilBertForTokenClassification from anuroopkeshav +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline` is a English model originally trained by anuroopkeshav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en_5.5.0_3.0_1725448335445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline_en_5.5.0_3.0_1725448335445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_anuroopkeshav_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/anuroopkeshav/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_cadec_active_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_cadec_active_en.md new file mode 100644 index 00000000000000..8392ccd5a516f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_cadec_active_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_cadec_active DistilBertForTokenClassification from csNoHug +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_cadec_active +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_cadec_active` is a English model originally trained by csNoHug. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_cadec_active_en_5.5.0_3.0_1725448908332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_cadec_active_en_5.5.0_3.0_1725448908332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_cadec_active","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_cadec_active", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_cadec_active| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/csNoHug/distilbert-base-uncased-finetuned-ner-cadec-active \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_en.md new file mode 100644 index 00000000000000..3df3d63b65d64a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_chuqiaog DistilBertForTokenClassification from chuqiaog +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_chuqiaog +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_chuqiaog` is a English model originally trained by chuqiaog. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_chuqiaog_en_5.5.0_3.0_1725492465592.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_chuqiaog_en_5.5.0_3.0_1725492465592.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_chuqiaog","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_chuqiaog", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_chuqiaog| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/chuqiaog/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en.md new file mode 100644 index 00000000000000..d21616e697e7e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline pipeline DistilBertForTokenClassification from chuqiaog +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline` is a English model originally trained by chuqiaog. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en_5.5.0_3.0_1725492478237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline_en_5.5.0_3.0_1725492478237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_chuqiaog_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/chuqiaog/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_emilyblah_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_emilyblah_en.md new file mode 100644 index 00000000000000..52af188556154b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_emilyblah_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_emilyblah DistilBertForTokenClassification from emilyblah +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_emilyblah +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_emilyblah` is a English model originally trained by emilyblah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_emilyblah_en_5.5.0_3.0_1725460559022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_emilyblah_en_5.5.0_3.0_1725460559022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_emilyblah","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_emilyblah", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_emilyblah| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/emilyblah/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_en.md new file mode 100644 index 00000000000000..ed88ff652f09da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_mawiwawi DistilBertForTokenClassification from Mawiwawi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_mawiwawi +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_mawiwawi` is a English model originally trained by Mawiwawi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mawiwawi_en_5.5.0_3.0_1725492437567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mawiwawi_en_5.5.0_3.0_1725492437567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_mawiwawi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_mawiwawi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_mawiwawi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Mawiwawi/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en.md new file mode 100644 index 00000000000000..514a409f06d3ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline pipeline DistilBertForTokenClassification from Mawiwawi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline` is a English model originally trained by Mawiwawi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en_5.5.0_3.0_1725492457533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline_en_5.5.0_3.0_1725492457533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_mawiwawi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Mawiwawi/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_misterstino_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_misterstino_en.md new file mode 100644 index 00000000000000..52b0a9337dfc4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_misterstino_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_misterstino DistilBertForTokenClassification from MisterStino +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_misterstino +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_misterstino` is a English model originally trained by MisterStino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_misterstino_en_5.5.0_3.0_1725448199322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_misterstino_en_5.5.0_3.0_1725448199322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_misterstino","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_misterstino", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_misterstino| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MisterStino/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_polo42_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_polo42_pipeline_en.md new file mode 100644 index 00000000000000..b1fecdb24b80b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_polo42_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_polo42_pipeline pipeline DistilBertForTokenClassification from polo42 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_polo42_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_polo42_pipeline` is a English model originally trained by polo42. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_polo42_pipeline_en_5.5.0_3.0_1725448424718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_polo42_pipeline_en_5.5.0_3.0_1725448424718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_polo42_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_polo42_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_polo42_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/polo42/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_en.md new file mode 100644 index 00000000000000..205408220b1354 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_trubnik1967 DistilBertForTokenClassification from Trubnik1967 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_trubnik1967 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_trubnik1967` is a English model originally trained by Trubnik1967. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_trubnik1967_en_5.5.0_3.0_1725448321961.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_trubnik1967_en_5.5.0_3.0_1725448321961.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_trubnik1967","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_trubnik1967", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_trubnik1967| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Trubnik1967/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en.md new file mode 100644 index 00000000000000..dd96ecf38fba34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline pipeline DistilBertForTokenClassification from Trubnik1967 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline` is a English model originally trained by Trubnik1967. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en_5.5.0_3.0_1725448336083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline_en_5.5.0_3.0_1725448336083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_trubnik1967_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Trubnik1967/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_vnear_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_vnear_en.md new file mode 100644 index 00000000000000..ba261f94129b4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_vnear_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_vnear DistilBertForTokenClassification from VNEar +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_vnear +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_vnear` is a English model originally trained by VNEar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_vnear_en_5.5.0_3.0_1725448900541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_vnear_en_5.5.0_3.0_1725448900541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_vnear","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_vnear", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_vnear| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/VNEar/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_en.md new file mode 100644 index 00000000000000..a962e2a9637be8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_yijingzzz DistilBertForTokenClassification from YijingZZZ +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_yijingzzz +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_yijingzzz` is a English model originally trained by YijingZZZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_yijingzzz_en_5.5.0_3.0_1725476294338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_yijingzzz_en_5.5.0_3.0_1725476294338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_yijingzzz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_yijingzzz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_yijingzzz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/YijingZZZ/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en.md new file mode 100644 index 00000000000000..955628661d3ef6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline pipeline DistilBertForTokenClassification from YijingZZZ +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline` is a English model originally trained by YijingZZZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en_5.5.0_3.0_1725476306633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline_en_5.5.0_3.0_1725476306633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_yijingzzz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/YijingZZZ/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_zy666_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_zy666_pipeline_en.md new file mode 100644 index 00000000000000..99b85f8765a93a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_ner_zy666_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_zy666_pipeline pipeline DistilBertForTokenClassification from zy666 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_zy666_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_zy666_pipeline` is a English model originally trained by zy666. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_zy666_pipeline_en_5.5.0_3.0_1725476359399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_zy666_pipeline_en_5.5.0_3.0_1725476359399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_zy666_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_zy666_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_zy666_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/zy666/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_news_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_news_pipeline_en.md new file mode 100644 index 00000000000000..fd798d2a43719c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_news_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_news_pipeline pipeline DistilBertEmbeddings from brownnie +author: John Snow Labs +name: distilbert_base_uncased_finetuned_news_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_news_pipeline` is a English model originally trained by brownnie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_news_pipeline_en_5.5.0_3.0_1725418674345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_news_pipeline_en_5.5.0_3.0_1725418674345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_news_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_news_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_news_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/brownnie/distilbert-base-uncased-finetuned-news + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_sayula_popoluca_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_sayula_popoluca_en.md new file mode 100644 index 00000000000000..d57c14816f20d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_sayula_popoluca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sayula_popoluca DistilBertForTokenClassification from Prince6 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sayula_popoluca +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sayula_popoluca` is a English model originally trained by Prince6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sayula_popoluca_en_5.5.0_3.0_1725460669512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sayula_popoluca_en_5.5.0_3.0_1725460669512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_sayula_popoluca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_sayula_popoluca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Prince6/distilbert-base-uncased-finetuned-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md new file mode 100644 index 00000000000000..e985585e4d663d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz DistilBertEmbeddings from BatuhanYilmaz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz +date: 2024-09-04 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz` is a English model originally trained by BatuhanYilmaz. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en_5.5.0_3.0_1725465494464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en_5.5.0_3.0_1725465494464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/BatuhanYilmaz/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en.md new file mode 100644 index 00000000000000..68139958763af2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline pipeline DistilBertForQuestionAnswering from BatuhanYilmaz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline` is a English model originally trained by BatuhanYilmaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en_5.5.0_3.0_1725465506602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline_en_5.5.0_3.0_1725465506602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/BatuhanYilmaz/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md new file mode 100644 index 00000000000000..00b091363fab3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_physhunter DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_physhunter +date: 2024-09-04 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_physhunter` is a English model originally trained by PhysHunter. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en_5.5.0_3.0_1725465410812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en_5.5.0_3.0_1725465410812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_physhunter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en.md new file mode 100644 index 00000000000000..374cabce4ffc77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline pipeline DistilBertForQuestionAnswering from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en_5.5.0_3.0_1725465422625.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline_en_5.5.0_3.0_1725465422625.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md new file mode 100644 index 00000000000000..f8c3a85be98e98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sofa566 DistilBertEmbeddings from sofa566 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sofa566 +date: 2024-09-04 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sofa566` is a English model originally trained by sofa566. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en_5.5.0_3.0_1725465690682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en_5.5.0_3.0_1725465690682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sofa566| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/sofa566/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en.md new file mode 100644 index 00000000000000..e1227f5c7f93d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline pipeline DistilBertForQuestionAnswering from sofa566 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline` is a English model originally trained by sofa566. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en_5.5.0_3.0_1725465705199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline_en_5.5.0_3.0_1725465705199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/sofa566/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_en.md new file mode 100644 index 00000000000000..a182734ef72d69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_fattahilmi DistilBertForQuestionAnswering from fattahilmi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_fattahilmi +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_fattahilmi` is a English model originally trained by fattahilmi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_fattahilmi_en_5.5.0_3.0_1725465506146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_fattahilmi_en_5.5.0_3.0_1725465506146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_fattahilmi","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_fattahilmi", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_fattahilmi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fattahilmi/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en.md new file mode 100644 index 00000000000000..2e58667f40f59b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline pipeline DistilBertForQuestionAnswering from fattahilmi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline` is a English model originally trained by fattahilmi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en_5.5.0_3.0_1725465523373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline_en_5.5.0_3.0_1725465523373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_fattahilmi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/fattahilmi/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_tanishq1420_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_tanishq1420_en.md new file mode 100644 index 00000000000000..7ff71037d0436e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_tanishq1420_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_tanishq1420 DistilBertForQuestionAnswering from tanishq1420 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_tanishq1420 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_tanishq1420` is a English model originally trained by tanishq1420. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_tanishq1420_en_5.5.0_3.0_1725465417025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_tanishq1420_en_5.5.0_3.0_1725465417025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_tanishq1420","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_tanishq1420", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_tanishq1420| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tanishq1420/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en.md new file mode 100644 index 00000000000000..1a6060efa4ca8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline pipeline DistilBertForQuestionAnswering from Yashaswi0506 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline` is a English model originally trained by Yashaswi0506. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en_5.5.0_3.0_1725465694182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline_en_5.5.0_3.0_1725465694182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_yashaswi0506_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Yashaswi0506/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_streamers_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_streamers_pipeline_en.md new file mode 100644 index 00000000000000..2818aee1d4b43e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_finetuned_streamers_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_streamers_pipeline pipeline DistilBertEmbeddings from muhbdeir +author: John Snow Labs +name: distilbert_base_uncased_finetuned_streamers_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_streamers_pipeline` is a English model originally trained by muhbdeir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_streamers_pipeline_en_5.5.0_3.0_1725414111774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_streamers_pipeline_en_5.5.0_3.0_1725414111774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_streamers_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_streamers_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_streamers_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/muhbdeir/distilbert-base-uncased-finetuned-streamers + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_go_emotion_bhadresh_savani_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_go_emotion_bhadresh_savani_en.md new file mode 100644 index 00000000000000..8ea489d01c3c77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_go_emotion_bhadresh_savani_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_go_emotion_bhadresh_savani DistilBertForSequenceClassification from bhadresh-savani +author: John Snow Labs +name: distilbert_base_uncased_go_emotion_bhadresh_savani +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_go_emotion_bhadresh_savani` is a English model originally trained by bhadresh-savani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_go_emotion_bhadresh_savani_en_5.5.0_3.0_1725490163736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_go_emotion_bhadresh_savani_en_5.5.0_3.0_1725490163736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_go_emotion_bhadresh_savani","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_go_emotion_bhadresh_savani", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_go_emotion_bhadresh_savani| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/bhadresh-savani/distilbert-base-uncased-go-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mluonium_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mluonium_pipeline_en.md new file mode 100644 index 00000000000000..7d1a3e1cffdfb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mluonium_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_mluonium_pipeline pipeline DistilBertForTokenClassification from mluonium +author: John Snow Labs +name: distilbert_base_uncased_mluonium_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mluonium_pipeline` is a English model originally trained by mluonium. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mluonium_pipeline_en_5.5.0_3.0_1725461004899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mluonium_pipeline_en_5.5.0_3.0_1725461004899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_mluonium_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_mluonium_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mluonium_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mluonium/distilbert-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mnli_textattack_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mnli_textattack_pipeline_en.md new file mode 100644 index 00000000000000..93e93a1243345e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_mnli_textattack_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_mnli_textattack_pipeline pipeline DistilBertForSequenceClassification from textattack +author: John Snow Labs +name: distilbert_base_uncased_mnli_textattack_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mnli_textattack_pipeline` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mnli_textattack_pipeline_en_5.5.0_3.0_1725489523087.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mnli_textattack_pipeline_en_5.5.0_3.0_1725489523087.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_mnli_textattack_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_mnli_textattack_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mnli_textattack_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/textattack/distilbert-base-uncased-MNLI + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en.md new file mode 100644 index 00000000000000..f7ef5bebb708a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva DistilBertForTokenClassification from bozhidara-pesheva +author: John Snow Labs +name: distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva` is a English model originally trained by bozhidara-pesheva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en_5.5.0_3.0_1725448503058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva_en_5.5.0_3.0_1725448503058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_norwegian_perturb_bozhidara_pesheva| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bozhidara-pesheva/distilbert-base-uncased-no-perturb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_en.md new file mode 100644 index 00000000000000..30cd9d3c7eef5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_travel_zphr_5st DistilBertForSequenceClassification from tom192180 +author: John Snow Labs +name: distilbert_base_uncased_travel_zphr_5st +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_travel_zphr_5st` is a English model originally trained by tom192180. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_travel_zphr_5st_en_5.5.0_3.0_1725489660476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_travel_zphr_5st_en_5.5.0_3.0_1725489660476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_travel_zphr_5st","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_travel_zphr_5st", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_travel_zphr_5st| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/tom192180/distilbert-base-uncased_travel_zphr_5st \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_pipeline_en.md new file mode 100644 index 00000000000000..a369f20319552b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_base_uncased_travel_zphr_5st_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_travel_zphr_5st_pipeline pipeline DistilBertForSequenceClassification from tom192180 +author: John Snow Labs +name: distilbert_base_uncased_travel_zphr_5st_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_travel_zphr_5st_pipeline` is a English model originally trained by tom192180. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_travel_zphr_5st_pipeline_en_5.5.0_3.0_1725489673140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_travel_zphr_5st_pipeline_en_5.5.0_3.0_1725489673140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_travel_zphr_5st_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_travel_zphr_5st_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_travel_zphr_5st_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/tom192180/distilbert-base-uncased_travel_zphr_5st + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_en.md new file mode 100644 index 00000000000000..9ed8df182eae03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_codeslang DistilBertForTokenClassification from codeSlang +author: John Snow Labs +name: distilbert_codeslang +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_codeslang` is a English model originally trained by codeSlang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_codeslang_en_5.5.0_3.0_1725476485532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_codeslang_en_5.5.0_3.0_1725476485532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_codeslang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_codeslang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_codeslang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/codeSlang/distilBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_pipeline_en.md new file mode 100644 index 00000000000000..0b731eaea4888f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_codeslang_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_codeslang_pipeline pipeline DistilBertForTokenClassification from codeSlang +author: John Snow Labs +name: distilbert_codeslang_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_codeslang_pipeline` is a English model originally trained by codeSlang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_codeslang_pipeline_en_5.5.0_3.0_1725476497668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_codeslang_pipeline_en_5.5.0_3.0_1725476497668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_codeslang_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_codeslang_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_codeslang_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/codeSlang/distilBert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_en.md new file mode 100644 index 00000000000000..3fa00aab34939a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_coqa DistilBertForQuestionAnswering from raghavbali +author: John Snow Labs +name: distilbert_finetuned_coqa +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_coqa` is a English model originally trained by raghavbali. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_coqa_en_5.5.0_3.0_1725465708961.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_coqa_en_5.5.0_3.0_1725465708961.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_coqa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_coqa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_coqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/raghavbali/distilbert-finetuned-coqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_pipeline_en.md new file mode 100644 index 00000000000000..9bb265186d6b5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_coqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_coqa_pipeline pipeline DistilBertForQuestionAnswering from raghavbali +author: John Snow Labs +name: distilbert_finetuned_coqa_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_coqa_pipeline` is a English model originally trained by raghavbali. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_coqa_pipeline_en_5.5.0_3.0_1725465721283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_coqa_pipeline_en_5.5.0_3.0_1725465721283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_coqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_coqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_coqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/raghavbali/distilbert-finetuned-coqa + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_finer_4_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_finer_4_v2_pipeline_en.md new file mode 100644 index 00000000000000..cdc3ecf3a94ea6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_finer_4_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_finer_4_v2_pipeline pipeline DistilBertForTokenClassification from ShadyML +author: John Snow Labs +name: distilbert_finetuned_finer_4_v2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_finer_4_v2_pipeline` is a English model originally trained by ShadyML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_finer_4_v2_pipeline_en_5.5.0_3.0_1725460915485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_finer_4_v2_pipeline_en_5.5.0_3.0_1725460915485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_finer_4_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_finer_4_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_finer_4_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ShadyML/distilbert-finetuned-finer-4-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_sayula_popoluca_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_sayula_popoluca_pipeline_en.md new file mode 100644 index 00000000000000..6fccbba0a20fd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_sayula_popoluca_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_sayula_popoluca_pipeline pipeline DistilBertForTokenClassification from amanpatkar +author: John Snow Labs +name: distilbert_finetuned_sayula_popoluca_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_sayula_popoluca_pipeline` is a English model originally trained by amanpatkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_sayula_popoluca_pipeline_en_5.5.0_3.0_1725460792081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_sayula_popoluca_pipeline_en_5.5.0_3.0_1725460792081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_sayula_popoluca_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_sayula_popoluca_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_sayula_popoluca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/amanpatkar/distilbert-finetuned-pos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_en.md new file mode 100644 index 00000000000000..d4a52c7f2700d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_fuutoru DistilBertForQuestionAnswering from FuuToru +author: John Snow Labs +name: distilbert_finetuned_squadv2_fuutoru +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_fuutoru` is a English model originally trained by FuuToru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_fuutoru_en_5.5.0_3.0_1725465450732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_fuutoru_en_5.5.0_3.0_1725465450732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_fuutoru","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_fuutoru", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_fuutoru| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/FuuToru/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_pipeline_en.md new file mode 100644 index 00000000000000..8390b89cd945f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_fuutoru_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_fuutoru_pipeline pipeline DistilBertForQuestionAnswering from FuuToru +author: John Snow Labs +name: distilbert_finetuned_squadv2_fuutoru_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_fuutoru_pipeline` is a English model originally trained by FuuToru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_fuutoru_pipeline_en_5.5.0_3.0_1725465462685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_fuutoru_pipeline_en_5.5.0_3.0_1725465462685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squadv2_fuutoru_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squadv2_fuutoru_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_fuutoru_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/FuuToru/distilbert-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_nampham1106_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_nampham1106_pipeline_en.md new file mode 100644 index 00000000000000..5399ddac7a2297 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_nampham1106_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_nampham1106_pipeline pipeline DistilBertForQuestionAnswering from nampham1106 +author: John Snow Labs +name: distilbert_finetuned_squadv2_nampham1106_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_nampham1106_pipeline` is a English model originally trained by nampham1106. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_nampham1106_pipeline_en_5.5.0_3.0_1725466000144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_nampham1106_pipeline_en_5.5.0_3.0_1725466000144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squadv2_nampham1106_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squadv2_nampham1106_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_nampham1106_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nampham1106/distilbert-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_ntn0301_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_ntn0301_pipeline_en.md new file mode 100644 index 00000000000000..1e6f917970458e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_finetuned_squadv2_ntn0301_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_ntn0301_pipeline pipeline DistilBertForQuestionAnswering from NTN0301 +author: John Snow Labs +name: distilbert_finetuned_squadv2_ntn0301_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_ntn0301_pipeline` is a English model originally trained by NTN0301. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_ntn0301_pipeline_en_5.5.0_3.0_1725465303068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_ntn0301_pipeline_en_5.5.0_3.0_1725465303068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squadv2_ntn0301_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squadv2_ntn0301_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_ntn0301_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/NTN0301/distilbert-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_hera_synthetic_pretrain_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_hera_synthetic_pretrain_pipeline_en.md new file mode 100644 index 00000000000000..0d6655db2989ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_hera_synthetic_pretrain_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_hera_synthetic_pretrain_pipeline pipeline DistilBertForTokenClassification from mpajas +author: John Snow Labs +name: distilbert_hera_synthetic_pretrain_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_hera_synthetic_pretrain_pipeline` is a English model originally trained by mpajas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_hera_synthetic_pretrain_pipeline_en_5.5.0_3.0_1725476406767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_hera_synthetic_pretrain_pipeline_en_5.5.0_3.0_1725476406767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_hera_synthetic_pretrain_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_hera_synthetic_pretrain_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_hera_synthetic_pretrain_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/mpajas/distilbert-hera-synthetic-pretrain + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_en.md new file mode 100644 index 00000000000000..a3afa028cfb7de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_imdb_huggingface_cyh002 DistilBertForSequenceClassification from cyh002 +author: John Snow Labs +name: distilbert_imdb_huggingface_cyh002 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_huggingface_cyh002` is a English model originally trained by cyh002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_huggingface_cyh002_en_5.5.0_3.0_1725490247844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_huggingface_cyh002_en_5.5.0_3.0_1725490247844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_imdb_huggingface_cyh002","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_imdb_huggingface_cyh002", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_huggingface_cyh002| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cyh002/DISTILBERT-IMDB-HUGGINGFACE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_pipeline_en.md new file mode 100644 index 00000000000000..975035a787aba4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_imdb_huggingface_cyh002_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_imdb_huggingface_cyh002_pipeline pipeline DistilBertForSequenceClassification from cyh002 +author: John Snow Labs +name: distilbert_imdb_huggingface_cyh002_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_huggingface_cyh002_pipeline` is a English model originally trained by cyh002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_huggingface_cyh002_pipeline_en_5.5.0_3.0_1725490260269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_huggingface_cyh002_pipeline_en_5.5.0_3.0_1725490260269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_imdb_huggingface_cyh002_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_imdb_huggingface_cyh002_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_huggingface_cyh002_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cyh002/DISTILBERT-IMDB-HUGGINGFACE + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_masking_heaps_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_masking_heaps_en.md new file mode 100644 index 00000000000000..0ba38fec5afe3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_masking_heaps_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_masking_heaps DistilBertEmbeddings from johannes-garstenauer +author: John Snow Labs +name: distilbert_masking_heaps +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_masking_heaps` is a English model originally trained by johannes-garstenauer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_masking_heaps_en_5.5.0_3.0_1725413888409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_masking_heaps_en_5.5.0_3.0_1725413888409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_masking_heaps","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_masking_heaps","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_masking_heaps| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/johannes-garstenauer/distilbert_masking_heaps \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_en.md new file mode 100644 index 00000000000000..38577464379dda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_mlm_practice DistilBertEmbeddings from drchandra-code +author: John Snow Labs +name: distilbert_mlm_practice +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_practice` is a English model originally trained by drchandra-code. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_practice_en_5.5.0_3.0_1725414029276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_practice_en_5.5.0_3.0_1725414029276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_mlm_practice","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_mlm_practice","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_practice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/drchandra-code/distilbert-mlm-practice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_pipeline_en.md new file mode 100644 index 00000000000000..e2403d77f4b253 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_mlm_practice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_mlm_practice_pipeline pipeline DistilBertEmbeddings from drchandra-code +author: John Snow Labs +name: distilbert_mlm_practice_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_practice_pipeline` is a English model originally trained by drchandra-code. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_practice_pipeline_en_5.5.0_3.0_1725414042116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_practice_pipeline_en_5.5.0_3.0_1725414042116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_mlm_practice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_mlm_practice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_practice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/drchandra-code/distilbert-mlm-practice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_multilingual_cased_lft_xx.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_multilingual_cased_lft_xx.md new file mode 100644 index 00000000000000..6982555b1b6f89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_multilingual_cased_lft_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual distilbert_multilingual_cased_lft DistilBertForTokenClassification from praysimanjuntak +author: John Snow Labs +name: distilbert_multilingual_cased_lft +date: 2024-09-04 +tags: [xx, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_multilingual_cased_lft` is a Multilingual model originally trained by praysimanjuntak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_multilingual_cased_lft_xx_5.5.0_3.0_1725461205364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_multilingual_cased_lft_xx_5.5.0_3.0_1725461205364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_multilingual_cased_lft","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_multilingual_cased_lft", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_multilingual_cased_lft| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/praysimanjuntak/distilbert-multilingual-cased-lft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_BERT_ClinicalQA_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_BERT_ClinicalQA_pipeline_en.md new file mode 100644 index 00000000000000..15c09a594383f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_BERT_ClinicalQA_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_qa_BERT_ClinicalQA_pipeline pipeline DistilBertForQuestionAnswering from exafluence +author: John Snow Labs +name: distilbert_qa_BERT_ClinicalQA_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_BERT_ClinicalQA_pipeline` is a English model originally trained by exafluence. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_BERT_ClinicalQA_pipeline_en_5.5.0_3.0_1725466000076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_BERT_ClinicalQA_pipeline_en_5.5.0_3.0_1725466000076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_qa_BERT_ClinicalQA_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_qa_BERT_ClinicalQA_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_BERT_ClinicalQA_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/exafluence/BERT-ClinicalQA + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_COVID_DistilBERTc_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_COVID_DistilBERTc_en.md new file mode 100644 index 00000000000000..75942a855c46af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_COVID_DistilBERTc_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English DistilBertForQuestionAnswering model (from rahulkuruvilla) C Version +author: John Snow Labs +name: distilbert_qa_COVID_DistilBERTc +date: 2024-09-04 +tags: [en, open_source, distilbert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `COVID-DistilBERTc` is a English model originally trained by `rahulkuruvilla`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_COVID_DistilBERTc_en_5.5.0_3.0_1725465506512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_COVID_DistilBERTc_en_5.5.0_3.0_1725465506512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_COVID_DistilBERTc","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = DistilBertForQuestionAnswering.pretrained("distilbert_qa_COVID_DistilBERTc","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.covid.distil_bert.c.by_rahulkuruvilla").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_COVID_DistilBERTc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +- https://huggingface.co/rahulkuruvilla/COVID-DistilBERTc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..6aba44571e68c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_qa_checkpoint_500_finetuned_squad_pipeline pipeline DistilBertForQuestionAnswering from tabo +author: John Snow Labs +name: distilbert_qa_checkpoint_500_finetuned_squad_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_checkpoint_500_finetuned_squad_pipeline` is a English model originally trained by tabo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en_5.5.0_3.0_1725465629307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_checkpoint_500_finetuned_squad_pipeline_en_5.5.0_3.0_1725465629307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_qa_checkpoint_500_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_qa_checkpoint_500_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_checkpoint_500_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tabo/checkpoint-500-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_distilBertABSA_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_distilBertABSA_pipeline_en.md new file mode 100644 index 00000000000000..8879ffb44fcd3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_distilBertABSA_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_qa_distilBertABSA_pipeline pipeline DistilBertForQuestionAnswering from LucasS +author: John Snow Labs +name: distilbert_qa_distilBertABSA_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_distilBertABSA_pipeline` is a English model originally trained by LucasS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_distilBertABSA_pipeline_en_5.5.0_3.0_1725465526025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_distilBertABSA_pipeline_en_5.5.0_3.0_1725465526025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_qa_distilBertABSA_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_qa_distilBertABSA_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_distilBertABSA_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/LucasS/distilBertABSA + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_eurosmart_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_eurosmart_en.md new file mode 100644 index 00000000000000..cc84c4420e5765 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_eurosmart_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_qa_eurosmart DistilBertForQuestionAnswering from Eurosmart +author: John Snow Labs +name: distilbert_qa_eurosmart +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_eurosmart` is a English model originally trained by Eurosmart. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_eurosmart_en_5.5.0_3.0_1725465875779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_eurosmart_en_5.5.0_3.0_1725465875779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_eurosmart","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_eurosmart", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_eurosmart| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Eurosmart/distilbert-qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_en.md new file mode 100644 index 00000000000000..c276108393a6a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English DistilBertForQuestionAnswering model (from ZYW) +author: John Snow Labs +name: distilbert_qa_test_squad_trained +date: 2024-09-04 +tags: [en, open_source, distilbert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `test-squad-trained` is a English model originally trained by `ZYW`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_test_squad_trained_en_5.5.0_3.0_1725465307647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_test_squad_trained_en_5.5.0_3.0_1725465307647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_test_squad_trained","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = DistilBertForQuestionAnswering.pretrained("distilbert_qa_test_squad_trained","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.distil_bert.by_ZYW").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_test_squad_trained| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|505.4 MB| + +## References + +References + +- https://huggingface.co/ZYW/test-squad-trained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_pipeline_en.md new file mode 100644 index 00000000000000..b9fa63be0560e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_qa_test_squad_trained_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_qa_test_squad_trained_pipeline pipeline DistilBertForQuestionAnswering from ZYW +author: John Snow Labs +name: distilbert_qa_test_squad_trained_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_test_squad_trained_pipeline` is a English model originally trained by ZYW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_test_squad_trained_pipeline_en_5.5.0_3.0_1725465333921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_test_squad_trained_pipeline_en_5.5.0_3.0_1725465333921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_qa_test_squad_trained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_qa_test_squad_trained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_test_squad_trained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/ZYW/test-squad-trained + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_tuned_4labels_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_tuned_4labels_pipeline_en.md new file mode 100644 index 00000000000000..9d5b9a1aee5d40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_tuned_4labels_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_tuned_4labels_pipeline pipeline DistilBertForTokenClassification from dayannex +author: John Snow Labs +name: distilbert_tuned_4labels_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tuned_4labels_pipeline` is a English model originally trained by dayannex. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tuned_4labels_pipeline_en_5.5.0_3.0_1725448617655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tuned_4labels_pipeline_en_5.5.0_3.0_1725448617655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_tuned_4labels_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_tuned_4labels_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tuned_4labels_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/dayannex/distilbert-tuned-4labels + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_turkish_sentiment_analysis2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_turkish_sentiment_analysis2_pipeline_en.md new file mode 100644 index 00000000000000..7ab263f6ef00a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_turkish_sentiment_analysis2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_turkish_sentiment_analysis2_pipeline pipeline DistilBertForSequenceClassification from balciberin +author: John Snow Labs +name: distilbert_turkish_sentiment_analysis2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_turkish_sentiment_analysis2_pipeline` is a English model originally trained by balciberin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_turkish_sentiment_analysis2_pipeline_en_5.5.0_3.0_1725490250765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_turkish_sentiment_analysis2_pipeline_en_5.5.0_3.0_1725490250765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_turkish_sentiment_analysis2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_turkish_sentiment_analysis2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_turkish_sentiment_analysis2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/balciberin/distilbert_turkish_sentiment_analysis2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilbert_word2vec_256k_mlm_best_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilbert_word2vec_256k_mlm_best_pipeline_en.md new file mode 100644 index 00000000000000..fd3172ac54e448 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilbert_word2vec_256k_mlm_best_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_word2vec_256k_mlm_best_pipeline pipeline DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_word2vec_256k_mlm_best_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_word2vec_256k_mlm_best_pipeline` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_word2vec_256k_mlm_best_pipeline_en_5.5.0_3.0_1725413876164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_word2vec_256k_mlm_best_pipeline_en_5.5.0_3.0_1725413876164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_word2vec_256k_mlm_best_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_word2vec_256k_mlm_best_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_word2vec_256k_mlm_best_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|902.0 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-word2vec_256k-MLM_best + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilcamembert_base_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-04-distilcamembert_base_pipeline_fr.md new file mode 100644 index 00000000000000..88244dc18baae7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilcamembert_base_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French distilcamembert_base_pipeline pipeline CamemBertEmbeddings from cmarkea +author: John Snow Labs +name: distilcamembert_base_pipeline +date: 2024-09-04 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilcamembert_base_pipeline` is a French model originally trained by cmarkea. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilcamembert_base_pipeline_fr_5.5.0_3.0_1725442174055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilcamembert_base_pipeline_fr_5.5.0_3.0_1725442174055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilcamembert_base_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilcamembert_base_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilcamembert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|253.5 MB| + +## References + +https://huggingface.co/cmarkea/distilcamembert-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_finer_4_en.md b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_finer_4_en.md new file mode 100644 index 00000000000000..0f08247c63b558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_finer_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_finetuned_finer_4 DistilBertForTokenClassification from ShadyML +author: John Snow Labs +name: distillbert_finetuned_finer_4 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_finetuned_finer_4` is a English model originally trained by ShadyML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_finer_4_en_5.5.0_3.0_1725460482122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_finer_4_en_5.5.0_3.0_1725460482122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distillbert_finetuned_finer_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distillbert_finetuned_finer_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_finetuned_finer_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ShadyML/distillbert-finetuned-finer-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_en.md b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_en.md new file mode 100644 index 00000000000000..eef82ec9fbccfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_finetuned_medical_symptoms DistilBertForSequenceClassification from BillyTK616 +author: John Snow Labs +name: distillbert_finetuned_medical_symptoms +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_finetuned_medical_symptoms` is a English model originally trained by BillyTK616. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_medical_symptoms_en_5.5.0_3.0_1725489611489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_medical_symptoms_en_5.5.0_3.0_1725489611489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_finetuned_medical_symptoms","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_finetuned_medical_symptoms", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_finetuned_medical_symptoms| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BillyTK616/distillbert-finetuned-medical-symptoms \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_pipeline_en.md new file mode 100644 index 00000000000000..b654a4feb437f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distillbert_finetuned_medical_symptoms_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distillbert_finetuned_medical_symptoms_pipeline pipeline DistilBertForSequenceClassification from BillyTK616 +author: John Snow Labs +name: distillbert_finetuned_medical_symptoms_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_finetuned_medical_symptoms_pipeline` is a English model originally trained by BillyTK616. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_medical_symptoms_pipeline_en_5.5.0_3.0_1725489625957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_medical_symptoms_pipeline_en_5.5.0_3.0_1725489625957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distillbert_finetuned_medical_symptoms_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distillbert_finetuned_medical_symptoms_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_finetuned_medical_symptoms_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BillyTK616/distillbert-finetuned-medical-symptoms + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distillbert_political_finetune_en.md b/docs/_posts/ahmedlone127/2024-09-04-distillbert_political_finetune_en.md new file mode 100644 index 00000000000000..ad0a654665e289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distillbert_political_finetune_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_political_finetune DistilBertForSequenceClassification from harshal-11 +author: John Snow Labs +name: distillbert_political_finetune +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_political_finetune` is a English model originally trained by harshal-11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_political_finetune_en_5.5.0_3.0_1725489794211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_political_finetune_en_5.5.0_3.0_1725489794211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_political_finetune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_political_finetune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_political_finetune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/harshal-11/DistillBERT-Political-Finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_catalan_v2_ca.md b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_catalan_v2_ca.md new file mode 100644 index 00000000000000..769d487b898b9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_catalan_v2_ca.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Catalan, Valencian distilroberta_base_catalan_v2 RoBertaEmbeddings from projecte-aina +author: John Snow Labs +name: distilroberta_base_catalan_v2 +date: 2024-09-04 +tags: [ca, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ca +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_catalan_v2` is a Catalan, Valencian model originally trained by projecte-aina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_catalan_v2_ca_5.5.0_3.0_1725412512592.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_catalan_v2_ca_5.5.0_3.0_1725412512592.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_catalan_v2","ca") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_catalan_v2","ca") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_catalan_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ca| +|Size:|304.1 MB| + +## References + +https://huggingface.co/projecte-aina/distilroberta-base-ca-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en.md new file mode 100644 index 00000000000000..26956a37f1fce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline pipeline RoBertaForQuestionAnswering from Madhana +author: John Snow Labs +name: distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline` is a English model originally trained by Madhana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en_5.5.0_3.0_1725479523889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline_en_5.5.0_3.0_1725479523889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_finetuned_wikitext2_squad_qa_wandb2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/Madhana/distilroberta-base-finetuned-wikitext2-SQuAD-qa-WandB2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_wandb_week_3_complaints_classifier_512_en.md b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_wandb_week_3_complaints_classifier_512_en.md new file mode 100644 index 00000000000000..e0d19fb5f56f68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-distilroberta_base_wandb_week_3_complaints_classifier_512_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_wandb_week_3_complaints_classifier_512 RoBertaForSequenceClassification from Kayvane +author: John Snow Labs +name: distilroberta_base_wandb_week_3_complaints_classifier_512 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_wandb_week_3_complaints_classifier_512` is a English model originally trained by Kayvane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_wandb_week_3_complaints_classifier_512_en_5.5.0_3.0_1725485276178.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_wandb_week_3_complaints_classifier_512_en_5.5.0_3.0_1725485276178.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("distilroberta_base_wandb_week_3_complaints_classifier_512","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("distilroberta_base_wandb_week_3_complaints_classifier_512", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_wandb_week_3_complaints_classifier_512| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/Kayvane/distilroberta-base-wandb-week-3-complaints-classifier-512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_skr3178_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_skr3178_en.md new file mode 100644 index 00000000000000..153a49b380cec2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_skr3178_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model2_skr3178 CamemBertEmbeddings from skr3178 +author: John Snow Labs +name: dummy_model2_skr3178 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model2_skr3178` is a English model originally trained by skr3178. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model2_skr3178_en_5.5.0_3.0_1725408530553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model2_skr3178_en_5.5.0_3.0_1725408530553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model2_skr3178","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model2_skr3178","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model2_skr3178| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/skr3178/dummy-model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_en.md new file mode 100644 index 00000000000000..f12bfdc2863be1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model2_tiffanytiffany CamemBertEmbeddings from TiffanyTiffany +author: John Snow Labs +name: dummy_model2_tiffanytiffany +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model2_tiffanytiffany` is a English model originally trained by TiffanyTiffany. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model2_tiffanytiffany_en_5.5.0_3.0_1725442278632.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model2_tiffanytiffany_en_5.5.0_3.0_1725442278632.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model2_tiffanytiffany","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model2_tiffanytiffany","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model2_tiffanytiffany| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/TiffanyTiffany/dummy-model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_pipeline_en.md new file mode 100644 index 00000000000000..ef7ce417c2e537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model2_tiffanytiffany_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model2_tiffanytiffany_pipeline pipeline CamemBertEmbeddings from TiffanyTiffany +author: John Snow Labs +name: dummy_model2_tiffanytiffany_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model2_tiffanytiffany_pipeline` is a English model originally trained by TiffanyTiffany. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model2_tiffanytiffany_pipeline_en_5.5.0_3.0_1725442355650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model2_tiffanytiffany_pipeline_en_5.5.0_3.0_1725442355650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model2_tiffanytiffany_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model2_tiffanytiffany_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model2_tiffanytiffany_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/TiffanyTiffany/dummy-model2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_7_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_7_pipeline_en.md new file mode 100644 index 00000000000000..844ab00c968cb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_7_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_7_pipeline pipeline CamemBertEmbeddings from diegoref +author: John Snow Labs +name: dummy_model_7_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_7_pipeline` is a English model originally trained by diegoref. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_7_pipeline_en_5.5.0_3.0_1725442311949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_7_pipeline_en_5.5.0_3.0_1725442311949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_7_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_7_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_7_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/diegoref/dummy-model-7 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ainullbabystep_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ainullbabystep_pipeline_en.md new file mode 100644 index 00000000000000..9ec0152509df75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ainullbabystep_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_ainullbabystep_pipeline pipeline CamemBertEmbeddings from AINullBabystep +author: John Snow Labs +name: dummy_model_ainullbabystep_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_ainullbabystep_pipeline` is a English model originally trained by AINullBabystep. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_ainullbabystep_pipeline_en_5.5.0_3.0_1725408384492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_ainullbabystep_pipeline_en_5.5.0_3.0_1725408384492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_ainullbabystep_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_ainullbabystep_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_ainullbabystep_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/AINullBabystep/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_benchan79_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_benchan79_pipeline_en.md new file mode 100644 index 00000000000000..bcd513ef872390 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_benchan79_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_benchan79_pipeline pipeline CamemBertEmbeddings from benchan79 +author: John Snow Labs +name: dummy_model_benchan79_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_benchan79_pipeline` is a English model originally trained by benchan79. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_benchan79_pipeline_en_5.5.0_3.0_1725409409248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_benchan79_pipeline_en_5.5.0_3.0_1725409409248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_benchan79_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_benchan79_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_benchan79_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/benchan79/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_binitha_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_binitha_en.md new file mode 100644 index 00000000000000..440af0d7e72c09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_binitha_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_binitha CamemBertEmbeddings from Binitha +author: John Snow Labs +name: dummy_model_binitha +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_binitha` is a English model originally trained by Binitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_binitha_en_5.5.0_3.0_1725444817374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_binitha_en_5.5.0_3.0_1725444817374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_binitha","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_binitha","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_binitha| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Binitha/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_en.md new file mode 100644 index 00000000000000..003c11fe52745a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_dry CamemBertEmbeddings from DrY +author: John Snow Labs +name: dummy_model_dry +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_dry` is a English model originally trained by DrY. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_dry_en_5.5.0_3.0_1725444577140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_dry_en_5.5.0_3.0_1725444577140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_dry","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_dry","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_dry| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/DrY/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_pipeline_en.md new file mode 100644 index 00000000000000..914c86ab625a76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_dry_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_dry_pipeline pipeline CamemBertEmbeddings from DrY +author: John Snow Labs +name: dummy_model_dry_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_dry_pipeline` is a English model originally trained by DrY. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_dry_pipeline_en_5.5.0_3.0_1725444653667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_dry_pipeline_en_5.5.0_3.0_1725444653667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_dry_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_dry_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_dry_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/DrY/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ffleming_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ffleming_pipeline_en.md new file mode 100644 index 00000000000000..9a1ae39d503439 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_ffleming_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_ffleming_pipeline pipeline CamemBertEmbeddings from ffleming +author: John Snow Labs +name: dummy_model_ffleming_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_ffleming_pipeline` is a English model originally trained by ffleming. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_ffleming_pipeline_en_5.5.0_3.0_1725442331497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_ffleming_pipeline_en_5.5.0_3.0_1725442331497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_ffleming_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_ffleming_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_ffleming_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ffleming/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jianfeng777_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jianfeng777_pipeline_en.md new file mode 100644 index 00000000000000..6d5ba68f15bb17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jianfeng777_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_jianfeng777_pipeline pipeline CamemBertEmbeddings from Jianfeng777 +author: John Snow Labs +name: dummy_model_jianfeng777_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jianfeng777_pipeline` is a English model originally trained by Jianfeng777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jianfeng777_pipeline_en_5.5.0_3.0_1725408004082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jianfeng777_pipeline_en_5.5.0_3.0_1725408004082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_jianfeng777_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_jianfeng777_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jianfeng777_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Jianfeng777/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jonathansum_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jonathansum_en.md new file mode 100644 index 00000000000000..86b951d8f21f42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_jonathansum_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_jonathansum CamemBertEmbeddings from JonathanSum +author: John Snow Labs +name: dummy_model_jonathansum +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jonathansum` is a English model originally trained by JonathanSum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jonathansum_en_5.5.0_3.0_1725443150664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jonathansum_en_5.5.0_3.0_1725443150664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_jonathansum","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_jonathansum","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jonathansum| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/JonathanSum/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_maxcarduner_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_maxcarduner_en.md new file mode 100644 index 00000000000000..27c3e48a76afaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_maxcarduner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_maxcarduner CamemBertEmbeddings from maxcarduner +author: John Snow Labs +name: dummy_model_maxcarduner +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_maxcarduner` is a English model originally trained by maxcarduner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_maxcarduner_en_5.5.0_3.0_1725444728660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_maxcarduner_en_5.5.0_3.0_1725444728660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_maxcarduner","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_maxcarduner","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_maxcarduner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/maxcarduner/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_raphgg_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_raphgg_en.md new file mode 100644 index 00000000000000..9a7a43bcdf9ea7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_raphgg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_raphgg CamemBertEmbeddings from raphgg +author: John Snow Labs +name: dummy_model_raphgg +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_raphgg` is a English model originally trained by raphgg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_raphgg_en_5.5.0_3.0_1725408777476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_raphgg_en_5.5.0_3.0_1725408777476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_raphgg","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_raphgg","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_raphgg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/raphgg/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_sunilpinnamaneni_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_sunilpinnamaneni_pipeline_en.md new file mode 100644 index 00000000000000..44befcc5a7c4d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_sunilpinnamaneni_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_sunilpinnamaneni_pipeline pipeline CamemBertEmbeddings from sunilpinnamaneni +author: John Snow Labs +name: dummy_model_sunilpinnamaneni_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_sunilpinnamaneni_pipeline` is a English model originally trained by sunilpinnamaneni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_sunilpinnamaneni_pipeline_en_5.5.0_3.0_1725443020100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_sunilpinnamaneni_pipeline_en_5.5.0_3.0_1725443020100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_sunilpinnamaneni_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_sunilpinnamaneni_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_sunilpinnamaneni_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sunilpinnamaneni/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_en.md new file mode 100644 index 00000000000000..e1bbff7fb3c9b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_tanu09 CamemBertEmbeddings from tanu09 +author: John Snow Labs +name: dummy_model_tanu09 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_tanu09` is a English model originally trained by tanu09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_tanu09_en_5.5.0_3.0_1725442043667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_tanu09_en_5.5.0_3.0_1725442043667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_tanu09","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_tanu09","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_tanu09| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/tanu09/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_pipeline_en.md new file mode 100644 index 00000000000000..ea27860d809ab7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tanu09_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_tanu09_pipeline pipeline CamemBertEmbeddings from tanu09 +author: John Snow Labs +name: dummy_model_tanu09_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_tanu09_pipeline` is a English model originally trained by tanu09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_tanu09_pipeline_en_5.5.0_3.0_1725442122825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_tanu09_pipeline_en_5.5.0_3.0_1725442122825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_tanu09_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_tanu09_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_tanu09_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/tanu09/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tpanda09_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tpanda09_pipeline_en.md new file mode 100644 index 00000000000000..60634a03a4997d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_tpanda09_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_tpanda09_pipeline pipeline CamemBertEmbeddings from tpanda09 +author: John Snow Labs +name: dummy_model_tpanda09_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_tpanda09_pipeline` is a English model originally trained by tpanda09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_tpanda09_pipeline_en_5.5.0_3.0_1725443988388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_tpanda09_pipeline_en_5.5.0_3.0_1725443988388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_tpanda09_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_tpanda09_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_tpanda09_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/tpanda09/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_umalakshmi07_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_umalakshmi07_en.md new file mode 100644 index 00000000000000..04fef5bc0c2039 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_umalakshmi07_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_umalakshmi07 CamemBertEmbeddings from Umalakshmi07 +author: John Snow Labs +name: dummy_model_umalakshmi07 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_umalakshmi07` is a English model originally trained by Umalakshmi07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_umalakshmi07_en_5.5.0_3.0_1725409032010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_umalakshmi07_en_5.5.0_3.0_1725409032010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_umalakshmi07","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_umalakshmi07","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_umalakshmi07| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Umalakshmi07/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_vickysirwani_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_vickysirwani_en.md new file mode 100644 index 00000000000000..654f9741f4d27a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_vickysirwani_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_vickysirwani CamemBertEmbeddings from vickysirwani +author: John Snow Labs +name: dummy_model_vickysirwani +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_vickysirwani` is a English model originally trained by vickysirwani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_vickysirwani_en_5.5.0_3.0_1725409150670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_vickysirwani_en_5.5.0_3.0_1725409150670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_vickysirwani","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_vickysirwani","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_vickysirwani| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/vickysirwani/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_en.md new file mode 100644 index 00000000000000..045bc1132432e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_viraal CamemBertEmbeddings from Viraal +author: John Snow Labs +name: dummy_model_viraal +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_viraal` is a English model originally trained by Viraal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_viraal_en_5.5.0_3.0_1725444022029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_viraal_en_5.5.0_3.0_1725444022029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_viraal","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_viraal","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_viraal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Viraal/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_pipeline_en.md new file mode 100644 index 00000000000000..3050c94133df56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-dummy_model_viraal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_viraal_pipeline pipeline CamemBertEmbeddings from Viraal +author: John Snow Labs +name: dummy_model_viraal_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_viraal_pipeline` is a English model originally trained by Viraal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_viraal_pipeline_en_5.5.0_3.0_1725444098024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_viraal_pipeline_en_5.5.0_3.0_1725444098024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_viraal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_viraal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_viraal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Viraal/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-emotion_text_classifier_on_dd_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-emotion_text_classifier_on_dd_v1_pipeline_en.md new file mode 100644 index 00000000000000..ab6c2bc0f8d119 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-emotion_text_classifier_on_dd_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English emotion_text_classifier_on_dd_v1_pipeline pipeline RoBertaForSequenceClassification from Shotaro30678 +author: John Snow Labs +name: emotion_text_classifier_on_dd_v1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emotion_text_classifier_on_dd_v1_pipeline` is a English model originally trained by Shotaro30678. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emotion_text_classifier_on_dd_v1_pipeline_en_5.5.0_3.0_1725486119465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emotion_text_classifier_on_dd_v1_pipeline_en_5.5.0_3.0_1725486119465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("emotion_text_classifier_on_dd_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("emotion_text_classifier_on_dd_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emotion_text_classifier_on_dd_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|308.9 MB| + +## References + +https://huggingface.co/Shotaro30678/emotion_text_classifier_on_dd_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-environmentalbert_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-environmentalbert_base_pipeline_en.md new file mode 100644 index 00000000000000..2d68da72211209 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-environmentalbert_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English environmentalbert_base_pipeline pipeline RoBertaEmbeddings from ESGBERT +author: John Snow Labs +name: environmentalbert_base_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`environmentalbert_base_pipeline` is a English model originally trained by ESGBERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/environmentalbert_base_pipeline_en_5.5.0_3.0_1725412664939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/environmentalbert_base_pipeline_en_5.5.0_3.0_1725412664939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("environmentalbert_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("environmentalbert_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|environmentalbert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.8 MB| + +## References + +https://huggingface.co/ESGBERT/EnvironmentalBERT-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_en.md b/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_en.md new file mode 100644 index 00000000000000..1f2530cee844fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English esg_classification_english DistilBertForSequenceClassification from cea-list-lasti +author: John Snow Labs +name: esg_classification_english +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esg_classification_english` is a English model originally trained by cea-list-lasti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esg_classification_english_en_5.5.0_3.0_1725489693132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esg_classification_english_en_5.5.0_3.0_1725489693132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("esg_classification_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("esg_classification_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esg_classification_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.3 MB| + +## References + +https://huggingface.co/cea-list-lasti/ESG-classification-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_pipeline_en.md new file mode 100644 index 00000000000000..369104b2ac2487 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-esg_classification_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English esg_classification_english_pipeline pipeline DistilBertForSequenceClassification from cea-list-lasti +author: John Snow Labs +name: esg_classification_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esg_classification_english_pipeline` is a English model originally trained by cea-list-lasti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esg_classification_english_pipeline_en_5.5.0_3.0_1725489705060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esg_classification_english_pipeline_en_5.5.0_3.0_1725489705060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esg_classification_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esg_classification_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esg_classification_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.4 MB| + +## References + +https://huggingface.co/cea-list-lasti/ESG-classification-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-esg_sentiment_prediction_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-esg_sentiment_prediction_pipeline_en.md new file mode 100644 index 00000000000000..e91b0e32fc82d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-esg_sentiment_prediction_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English esg_sentiment_prediction_pipeline pipeline CamemBertForSequenceClassification from Katkatkuu +author: John Snow Labs +name: esg_sentiment_prediction_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esg_sentiment_prediction_pipeline` is a English model originally trained by Katkatkuu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esg_sentiment_prediction_pipeline_en_5.5.0_3.0_1725480381205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esg_sentiment_prediction_pipeline_en_5.5.0_3.0_1725480381205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esg_sentiment_prediction_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esg_sentiment_prediction_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esg_sentiment_prediction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|394.4 MB| + +## References + +https://huggingface.co/Katkatkuu/ESG_Sentiment_Prediction + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_77_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_77_pipeline_en.md new file mode 100644 index 00000000000000..bae91e631b714c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_77_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English facets_gpt_77_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_gpt_77_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_gpt_77_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_gpt_77_pipeline_en_5.5.0_3.0_1725470029719.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_gpt_77_pipeline_en_5.5.0_3.0_1725470029719.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("facets_gpt_77_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("facets_gpt_77_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_gpt_77_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_gpt_77 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_expanswer_35_en.md b/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_expanswer_35_en.md new file mode 100644 index 00000000000000..c90e8d28083bdc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-facets_gpt_expanswer_35_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English facets_gpt_expanswer_35 MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_gpt_expanswer_35 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_gpt_expanswer_35` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_gpt_expanswer_35_en_5.5.0_3.0_1725470914953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_gpt_expanswer_35_en_5.5.0_3.0_1725470914953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("facets_gpt_expanswer_35","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("facets_gpt_expanswer_35","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_gpt_expanswer_35| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_gpt_expanswer_35 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-fine_tuned_model_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-fine_tuned_model_1_pipeline_en.md new file mode 100644 index 00000000000000..fbd4e642408b14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-fine_tuned_model_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fine_tuned_model_1_pipeline pipeline AlbertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: fine_tuned_model_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_model_1_pipeline` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_model_1_pipeline_en_5.5.0_3.0_1725464892026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_model_1_pipeline_en_5.5.0_3.0_1725464892026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_model_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_model_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_model_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/KalaiselvanD/fine_tuned_model_1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finer_distillbert_v2_en.md b/docs/_posts/ahmedlone127/2024-09-04-finer_distillbert_v2_en.md new file mode 100644 index 00000000000000..86feac68764fb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finer_distillbert_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finer_distillbert_v2 DistilBertForTokenClassification from HariLuru +author: John Snow Labs +name: finer_distillbert_v2 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finer_distillbert_v2` is a English model originally trained by HariLuru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finer_distillbert_v2_en_5.5.0_3.0_1725492666612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finer_distillbert_v2_en_5.5.0_3.0_1725492666612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("finer_distillbert_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("finer_distillbert_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finer_distillbert_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/HariLuru/finer_distillbert_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en.md new file mode 100644 index 00000000000000..94b0db276068d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask DistilBertEmbeddings from hanyuany14 +author: John Snow Labs +name: finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask` is a English model originally trained by hanyuany14. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en_5.5.0_3.0_1725413763673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask_en_5.5.0_3.0_1725413763673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_dscs24_mitre_distilbert_base_uncased_fill_mask| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hanyuany14/finetuned-DSCS24-mitre-distilbert-base-uncased-fill-mask \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetuned_sail2017_indic_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetuned_sail2017_indic_bert_pipeline_en.md new file mode 100644 index 00000000000000..f43f81cabc2822 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetuned_sail2017_indic_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_sail2017_indic_bert_pipeline pipeline AlbertForSequenceClassification from aditeyabaral +author: John Snow Labs +name: finetuned_sail2017_indic_bert_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sail2017_indic_bert_pipeline` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sail2017_indic_bert_pipeline_en_5.5.0_3.0_1725488283184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sail2017_indic_bert_pipeline_en_5.5.0_3.0_1725488283184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_sail2017_indic_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_sail2017_indic_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sail2017_indic_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|127.8 MB| + +## References + +https://huggingface.co/aditeyabaral/finetuned-sail2017-indic-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetunedclip_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetunedclip_pipeline_en.md new file mode 100644 index 00000000000000..2012494a4b8996 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetunedclip_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetunedclip_pipeline pipeline CLIPForZeroShotClassification from homiehari +author: John Snow Labs +name: finetunedclip_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetunedclip_pipeline` is a English model originally trained by homiehari. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetunedclip_pipeline_en_5.5.0_3.0_1725456499288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetunedclip_pipeline_en_5.5.0_3.0_1725456499288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetunedclip_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetunedclip_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetunedclip_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/homiehari/finetunedCLIP + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_en.md new file mode 100644 index 00000000000000..c667d9f586b3d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_benjihearhear DistilBertForSequenceClassification from BenjiHearHear +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_benjihearhear +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_benjihearhear` is a English model originally trained by BenjiHearHear. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_benjihearhear_en_5.5.0_3.0_1725490186136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_benjihearhear_en_5.5.0_3.0_1725490186136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_benjihearhear","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_benjihearhear", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_benjihearhear| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BenjiHearHear/finetuning-sentiment-model-3000-samples \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en.md new file mode 100644 index 00000000000000..f88222209b79d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_benjihearhear_pipeline pipeline DistilBertForSequenceClassification from BenjiHearHear +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_benjihearhear_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_benjihearhear_pipeline` is a English model originally trained by BenjiHearHear. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en_5.5.0_3.0_1725490199007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_benjihearhear_pipeline_en_5.5.0_3.0_1725490199007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_sentiment_model_3000_samples_benjihearhear_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_sentiment_model_3000_samples_benjihearhear_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_benjihearhear_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BenjiHearHear/finetuning-sentiment-model-3000-samples + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en.md new file mode 100644 index 00000000000000..005ce092bca796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline pipeline DistilBertForSequenceClassification from carlodallaquercia +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline` is a English model originally trained by carlodallaquercia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en_5.5.0_3.0_1725490094219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline_en_5.5.0_3.0_1725490094219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_carlodallaquercia_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/carlodallaquercia/finetuning-sentiment-model-3000-samples + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-first_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-first_model_pipeline_en.md new file mode 100644 index 00000000000000..1c18ae798ab9c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-first_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English first_model_pipeline pipeline DistilBertForTokenClassification from Harini2506 +author: John Snow Labs +name: first_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_model_pipeline` is a English model originally trained by Harini2506. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_model_pipeline_en_5.5.0_3.0_1725492908536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_model_pipeline_en_5.5.0_3.0_1725492908536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("first_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("first_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Harini2506/first_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-fnctech_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-fnctech_pipeline_en.md new file mode 100644 index 00000000000000..a5df1dbd348df2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-fnctech_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fnctech_pipeline pipeline MPNetEmbeddings from bchan007 +author: John Snow Labs +name: fnctech_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fnctech_pipeline` is a English model originally trained by bchan007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fnctech_pipeline_en_5.5.0_3.0_1725470510623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fnctech_pipeline_en_5.5.0_3.0_1725470510623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fnctech_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fnctech_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fnctech_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/bchan007/fnctech + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-furina_with_transliteration_minangkabau_en.md b/docs/_posts/ahmedlone127/2024-09-04-furina_with_transliteration_minangkabau_en.md new file mode 100644 index 00000000000000..a128361d0e5c6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-furina_with_transliteration_minangkabau_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English furina_with_transliteration_minangkabau XlmRoBertaEmbeddings from yihongLiu +author: John Snow Labs +name: furina_with_transliteration_minangkabau +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`furina_with_transliteration_minangkabau` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/furina_with_transliteration_minangkabau_en_5.5.0_3.0_1725417766684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/furina_with_transliteration_minangkabau_en_5.5.0_3.0_1725417766684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("furina_with_transliteration_minangkabau","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("furina_with_transliteration_minangkabau","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|furina_with_transliteration_minangkabau| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/yihongLiu/furina-with-transliteration-min \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-gdpr_anonymiseingsmodel_ganm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-gdpr_anonymiseingsmodel_ganm_pipeline_en.md new file mode 100644 index 00000000000000..e1beb5e4423a50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-gdpr_anonymiseingsmodel_ganm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gdpr_anonymiseingsmodel_ganm_pipeline pipeline BertForTokenClassification from AI-aktindsigt +author: John Snow Labs +name: gdpr_anonymiseingsmodel_ganm_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gdpr_anonymiseingsmodel_ganm_pipeline` is a English model originally trained by AI-aktindsigt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gdpr_anonymiseingsmodel_ganm_pipeline_en_5.5.0_3.0_1725477972628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gdpr_anonymiseingsmodel_ganm_pipeline_en_5.5.0_3.0_1725477972628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gdpr_anonymiseingsmodel_ganm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gdpr_anonymiseingsmodel_ganm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gdpr_anonymiseingsmodel_ganm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|666.0 MB| + +## References + +https://huggingface.co/AI-aktindsigt/gdpr_anonymiseingsmodel_ganm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-geolm_base_toponym_recognition_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-geolm_base_toponym_recognition_pipeline_en.md new file mode 100644 index 00000000000000..f447c4e3aa89d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-geolm_base_toponym_recognition_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English geolm_base_toponym_recognition_pipeline pipeline BertForTokenClassification from zekun-li +author: John Snow Labs +name: geolm_base_toponym_recognition_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`geolm_base_toponym_recognition_pipeline` is a English model originally trained by zekun-li. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/geolm_base_toponym_recognition_pipeline_en_5.5.0_3.0_1725449997846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/geolm_base_toponym_recognition_pipeline_en_5.5.0_3.0_1725449997846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("geolm_base_toponym_recognition_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("geolm_base_toponym_recognition_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|geolm_base_toponym_recognition_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/zekun-li/geolm-base-toponym-recognition + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_en.md b/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_en.md new file mode 100644 index 00000000000000..0f114e644f92c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English google_play_sentiment_analysis_danielribeiro BertForSequenceClassification from danielribeiro +author: John Snow Labs +name: google_play_sentiment_analysis_danielribeiro +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_play_sentiment_analysis_danielribeiro` is a English model originally trained by danielribeiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_play_sentiment_analysis_danielribeiro_en_5.5.0_3.0_1725432960121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_play_sentiment_analysis_danielribeiro_en_5.5.0_3.0_1725432960121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("google_play_sentiment_analysis_danielribeiro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("google_play_sentiment_analysis_danielribeiro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_play_sentiment_analysis_danielribeiro| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/danielribeiro/google-play-sentiment-analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_pipeline_en.md new file mode 100644 index 00000000000000..e1e933daac8838 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-google_play_sentiment_analysis_danielribeiro_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English google_play_sentiment_analysis_danielribeiro_pipeline pipeline BertForSequenceClassification from danielribeiro +author: John Snow Labs +name: google_play_sentiment_analysis_danielribeiro_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_play_sentiment_analysis_danielribeiro_pipeline` is a English model originally trained by danielribeiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_play_sentiment_analysis_danielribeiro_pipeline_en_5.5.0_3.0_1725433022533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_play_sentiment_analysis_danielribeiro_pipeline_en_5.5.0_3.0_1725433022533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("google_play_sentiment_analysis_danielribeiro_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("google_play_sentiment_analysis_danielribeiro_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_play_sentiment_analysis_danielribeiro_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/danielribeiro/google-play-sentiment-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-gqa_roberta_german_legal_squad_2000_de.md b/docs/_posts/ahmedlone127/2024-09-04-gqa_roberta_german_legal_squad_2000_de.md new file mode 100644 index 00000000000000..6bed11faab0133 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-gqa_roberta_german_legal_squad_2000_de.md @@ -0,0 +1,86 @@ +--- +layout: model +title: German gqa_roberta_german_legal_squad_2000 RoBertaForQuestionAnswering from farid1088 +author: John Snow Labs +name: gqa_roberta_german_legal_squad_2000 +date: 2024-09-04 +tags: [de, open_source, onnx, question_answering, roberta] +task: Question Answering +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gqa_roberta_german_legal_squad_2000` is a German model originally trained by farid1088. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gqa_roberta_german_legal_squad_2000_de_5.5.0_3.0_1725479738226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gqa_roberta_german_legal_squad_2000_de_5.5.0_3.0_1725479738226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("gqa_roberta_german_legal_squad_2000","de") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("gqa_roberta_german_legal_squad_2000", "de") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gqa_roberta_german_legal_squad_2000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|de| +|Size:|465.8 MB| + +## References + +https://huggingface.co/farid1088/GQA_RoBERTa_German_legal_SQuAD_2000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-09-04-hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en.md new file mode 100644 index 00000000000000..6ed9a369a5d7ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa DeBertaForSequenceClassification from SiddharthaM +author: John Snow Labs +name: hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa` is a English model originally trained by SiddharthaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en_5.5.0_3.0_1725463265879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa_en_5.5.0_3.0_1725463265879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hasoc19_microsoft_mdeberta_v3_base_sentiment_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|815.4 MB| + +## References + +https://huggingface.co/SiddharthaM/hasoc19-microsoft-mdeberta-v3-base-sentiment-new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-hw01_hamsty_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-hw01_hamsty_pipeline_en.md new file mode 100644 index 00000000000000..37f2ab59192ed1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-hw01_hamsty_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hw01_hamsty_pipeline pipeline DistilBertForSequenceClassification from hamsty +author: John Snow Labs +name: hw01_hamsty_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw01_hamsty_pipeline` is a English model originally trained by hamsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw01_hamsty_pipeline_en_5.5.0_3.0_1725490185912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw01_hamsty_pipeline_en_5.5.0_3.0_1725490185912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hw01_hamsty_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hw01_hamsty_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw01_hamsty_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/hamsty/HW01 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_en.md b/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_en.md new file mode 100644 index 00000000000000..42db99e7af7aef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ibert_roberta_base_abusive_oriya_threatening_speech RoBertaForSequenceClassification from DunnBC22 +author: John Snow Labs +name: ibert_roberta_base_abusive_oriya_threatening_speech +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ibert_roberta_base_abusive_oriya_threatening_speech` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_abusive_oriya_threatening_speech_en_5.5.0_3.0_1725485652260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_abusive_oriya_threatening_speech_en_5.5.0_3.0_1725485652260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("ibert_roberta_base_abusive_oriya_threatening_speech","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("ibert_roberta_base_abusive_oriya_threatening_speech", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ibert_roberta_base_abusive_oriya_threatening_speech| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|467.1 MB| + +## References + +https://huggingface.co/DunnBC22/ibert-roberta-base-Abusive_Or_Threatening_Speech \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en.md new file mode 100644 index 00000000000000..c5aef41e2704bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ibert_roberta_base_abusive_oriya_threatening_speech_pipeline pipeline RoBertaForSequenceClassification from DunnBC22 +author: John Snow Labs +name: ibert_roberta_base_abusive_oriya_threatening_speech_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ibert_roberta_base_abusive_oriya_threatening_speech_pipeline` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en_5.5.0_3.0_1725485674868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_abusive_oriya_threatening_speech_pipeline_en_5.5.0_3.0_1725485674868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ibert_roberta_base_abusive_oriya_threatening_speech_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ibert_roberta_base_abusive_oriya_threatening_speech_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ibert_roberta_base_abusive_oriya_threatening_speech_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|467.1 MB| + +## References + +https://huggingface.co/DunnBC22/ibert-roberta-base-Abusive_Or_Threatening_Speech + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-icelandic_title_setfit_en.md b/docs/_posts/ahmedlone127/2024-09-04-icelandic_title_setfit_en.md new file mode 100644 index 00000000000000..74b6f7ccc4756a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-icelandic_title_setfit_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English icelandic_title_setfit MPNetEmbeddings from AlekseyKorshuk +author: John Snow Labs +name: icelandic_title_setfit +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`icelandic_title_setfit` is a English model originally trained by AlekseyKorshuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/icelandic_title_setfit_en_5.5.0_3.0_1725470397331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/icelandic_title_setfit_en_5.5.0_3.0_1725470397331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("icelandic_title_setfit","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("icelandic_title_setfit","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|icelandic_title_setfit| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/AlekseyKorshuk/is-title-setfit \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-imdb_review_sentiement_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-imdb_review_sentiement_pipeline_en.md new file mode 100644 index 00000000000000..a7744af13f7051 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-imdb_review_sentiement_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_review_sentiement_pipeline pipeline DistilBertForSequenceClassification from santiadavani +author: John Snow Labs +name: imdb_review_sentiement_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_review_sentiement_pipeline` is a English model originally trained by santiadavani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_review_sentiement_pipeline_en_5.5.0_3.0_1725490028144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_review_sentiement_pipeline_en_5.5.0_3.0_1725490028144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_review_sentiement_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_review_sentiement_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_review_sentiement_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/santiadavani/imdb_review_sentiement + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_en.md b/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_en.md new file mode 100644 index 00000000000000..53ea1576e0a201 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English incremental_semi_supervised_training_1mln_downsampled RoBertaForSequenceClassification from bitsanlp +author: John Snow Labs +name: incremental_semi_supervised_training_1mln_downsampled +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incremental_semi_supervised_training_1mln_downsampled` is a English model originally trained by bitsanlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incremental_semi_supervised_training_1mln_downsampled_en_5.5.0_3.0_1725485634376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incremental_semi_supervised_training_1mln_downsampled_en_5.5.0_3.0_1725485634376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("incremental_semi_supervised_training_1mln_downsampled","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("incremental_semi_supervised_training_1mln_downsampled", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incremental_semi_supervised_training_1mln_downsampled| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/bitsanlp/incremental-semi-supervised-training-1mln-downsampled \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_pipeline_en.md new file mode 100644 index 00000000000000..b1517d3ad08df0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-incremental_semi_supervised_training_1mln_downsampled_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English incremental_semi_supervised_training_1mln_downsampled_pipeline pipeline RoBertaForSequenceClassification from bitsanlp +author: John Snow Labs +name: incremental_semi_supervised_training_1mln_downsampled_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incremental_semi_supervised_training_1mln_downsampled_pipeline` is a English model originally trained by bitsanlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incremental_semi_supervised_training_1mln_downsampled_pipeline_en_5.5.0_3.0_1725485702615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incremental_semi_supervised_training_1mln_downsampled_pipeline_en_5.5.0_3.0_1725485702615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("incremental_semi_supervised_training_1mln_downsampled_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("incremental_semi_supervised_training_1mln_downsampled_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incremental_semi_supervised_training_1mln_downsampled_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/bitsanlp/incremental-semi-supervised-training-1mln-downsampled + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indic_bert_finetuned_trac_ds_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-indic_bert_finetuned_trac_ds_pipeline_en.md new file mode 100644 index 00000000000000..6df35c01f67814 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indic_bert_finetuned_trac_ds_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indic_bert_finetuned_trac_ds_pipeline pipeline AlbertForSequenceClassification from IIIT-L +author: John Snow Labs +name: indic_bert_finetuned_trac_ds_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indic_bert_finetuned_trac_ds_pipeline` is a English model originally trained by IIIT-L. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indic_bert_finetuned_trac_ds_pipeline_en_5.5.0_3.0_1725488514620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indic_bert_finetuned_trac_ds_pipeline_en_5.5.0_3.0_1725488514620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indic_bert_finetuned_trac_ds_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indic_bert_finetuned_trac_ds_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indic_bert_finetuned_trac_ds_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|127.8 MB| + +## References + +https://huggingface.co/IIIT-L/indic-bert-finetuned-TRAC-DS + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indicbert_hindi_urdu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-indicbert_hindi_urdu_pipeline_en.md new file mode 100644 index 00000000000000..7e2ecae76030d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indicbert_hindi_urdu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indicbert_hindi_urdu_pipeline pipeline AlbertForTokenClassification from anwesham +author: John Snow Labs +name: indicbert_hindi_urdu_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbert_hindi_urdu_pipeline` is a English model originally trained by anwesham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbert_hindi_urdu_pipeline_en_5.5.0_3.0_1725486920499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbert_hindi_urdu_pipeline_en_5.5.0_3.0_1725486920499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indicbert_hindi_urdu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indicbert_hindi_urdu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbert_hindi_urdu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|125.6 MB| + +## References + +https://huggingface.co/anwesham/indicbert_hi_ur + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indicbert_urdu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-indicbert_urdu_pipeline_en.md new file mode 100644 index 00000000000000..8946f60f2f6bb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indicbert_urdu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indicbert_urdu_pipeline pipeline AlbertForTokenClassification from anwesham +author: John Snow Labs +name: indicbert_urdu_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbert_urdu_pipeline` is a English model originally trained by anwesham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbert_urdu_pipeline_en_5.5.0_3.0_1725486628645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbert_urdu_pipeline_en_5.5.0_3.0_1725486628645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indicbert_urdu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indicbert_urdu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbert_urdu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|125.6 MB| + +## References + +https://huggingface.co/anwesham/indicbert_ur + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indicner_oriya_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-indicner_oriya_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..7faadc5ef93b7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indicner_oriya_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indicner_oriya_finetuned_pipeline pipeline AlbertForTokenClassification from dheerajpai +author: John Snow Labs +name: indicner_oriya_finetuned_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicner_oriya_finetuned_pipeline` is a English model originally trained by dheerajpai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicner_oriya_finetuned_pipeline_en_5.5.0_3.0_1725486628758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicner_oriya_finetuned_pipeline_en_5.5.0_3.0_1725486628758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indicner_oriya_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indicner_oriya_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicner_oriya_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|125.6 MB| + +## References + +https://huggingface.co/dheerajpai/indicner-oriya-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indojave_codemixed_roberta_base_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-04-indojave_codemixed_roberta_base_pipeline_id.md new file mode 100644 index 00000000000000..7d9791dd94ea0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indojave_codemixed_roberta_base_pipeline_id.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Indonesian indojave_codemixed_roberta_base_pipeline pipeline RoBertaEmbeddings from fathan +author: John Snow Labs +name: indojave_codemixed_roberta_base_pipeline +date: 2024-09-04 +tags: [id, open_source, pipeline, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indojave_codemixed_roberta_base_pipeline` is a Indonesian model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indojave_codemixed_roberta_base_pipeline_id_5.5.0_3.0_1725412558568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indojave_codemixed_roberta_base_pipeline_id_5.5.0_3.0_1725412558568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indojave_codemixed_roberta_base_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indojave_codemixed_roberta_base_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indojave_codemixed_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|471.1 MB| + +## References + +https://huggingface.co/fathan/indojave-codemixed-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-indonesian_punctuation_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-indonesian_punctuation_pipeline_en.md new file mode 100644 index 00000000000000..747d06fc5866fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-indonesian_punctuation_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indonesian_punctuation_pipeline pipeline AlbertForTokenClassification from Wikidepia +author: John Snow Labs +name: indonesian_punctuation_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indonesian_punctuation_pipeline` is a English model originally trained by Wikidepia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indonesian_punctuation_pipeline_en_5.5.0_3.0_1725486985329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indonesian_punctuation_pipeline_en_5.5.0_3.0_1725486985329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indonesian_punctuation_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indonesian_punctuation_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indonesian_punctuation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|21.3 MB| + +## References + +https://huggingface.co/Wikidepia/indonesian-punctuation + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-intent_xl_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-intent_xl_pipeline_en.md new file mode 100644 index 00000000000000..52e33ddbe161c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-intent_xl_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English intent_xl_pipeline pipeline AlbertForSequenceClassification from dejanseo +author: John Snow Labs +name: intent_xl_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intent_xl_pipeline` is a English model originally trained by dejanseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intent_xl_pipeline_en_5.5.0_3.0_1725488411481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intent_xl_pipeline_en_5.5.0_3.0_1725488411481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("intent_xl_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("intent_xl_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intent_xl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.0 MB| + +## References + +https://huggingface.co/dejanseo/Intent-XL + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en.md new file mode 100644 index 00000000000000..90f079d424f638 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline pipeline RoBertaForQuestionAnswering from chriskim2273 +author: John Snow Labs +name: iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline` is a English model originally trained by chriskim2273. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en_5.5.0_3.0_1725484108697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline_en_5.5.0_3.0_1725484108697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|iotnation_companyname_extraction_qa_model_1_2_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.7 MB| + +## References + +https://huggingface.co/chriskim2273/IOTNation_CompanyName_Extraction_QA_Model_1.2_Roberta + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-kalbert_en.md b/docs/_posts/ahmedlone127/2024-09-04-kalbert_en.md new file mode 100644 index 00000000000000..73a9b371df7a86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-kalbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English kalbert AlbertEmbeddings from Chakita +author: John Snow Labs +name: kalbert +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, albert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kalbert` is a English model originally trained by Chakita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kalbert_en_5.5.0_3.0_1725435320060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kalbert_en_5.5.0_3.0_1725435320060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("kalbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("kalbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kalbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|en| +|Size:|125.5 MB| + +## References + +https://huggingface.co/Chakita/Kalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-kalematech_arabic_stt_asr_based_on_whisper_small_ar.md b/docs/_posts/ahmedlone127/2024-09-04-kalematech_arabic_stt_asr_based_on_whisper_small_ar.md new file mode 100644 index 00000000000000..b06df88c93c1e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-kalematech_arabic_stt_asr_based_on_whisper_small_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic kalematech_arabic_stt_asr_based_on_whisper_small WhisperForCTC from Salama1429 +author: John Snow Labs +name: kalematech_arabic_stt_asr_based_on_whisper_small +date: 2024-09-04 +tags: [ar, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kalematech_arabic_stt_asr_based_on_whisper_small` is a Arabic model originally trained by Salama1429. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kalematech_arabic_stt_asr_based_on_whisper_small_ar_5.5.0_3.0_1725429225048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kalematech_arabic_stt_asr_based_on_whisper_small_ar_5.5.0_3.0_1725429225048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("kalematech_arabic_stt_asr_based_on_whisper_small","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("kalematech_arabic_stt_asr_based_on_whisper_small", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kalematech_arabic_stt_asr_based_on_whisper_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Salama1429/KalemaTech-Arabic-STT-ASR-based-on-Whisper-Small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-kanberto_pipeline_kn.md b/docs/_posts/ahmedlone127/2024-09-04-kanberto_pipeline_kn.md new file mode 100644 index 00000000000000..59b922015f2abc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-kanberto_pipeline_kn.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Kannada kanberto_pipeline pipeline RoBertaEmbeddings from Naveen-k +author: John Snow Labs +name: kanberto_pipeline +date: 2024-09-04 +tags: [kn, open_source, pipeline, onnx] +task: Embeddings +language: kn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kanberto_pipeline` is a Kannada model originally trained by Naveen-k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kanberto_pipeline_kn_5.5.0_3.0_1725412329196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kanberto_pipeline_kn_5.5.0_3.0_1725412329196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("kanberto_pipeline", lang = "kn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("kanberto_pipeline", lang = "kn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kanberto_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|kn| +|Size:|311.8 MB| + +## References + +https://huggingface.co/Naveen-k/KanBERTo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-kaviel_threat_text_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-04-kaviel_threat_text_classifier_en.md new file mode 100644 index 00000000000000..0ae3d9f1bc3ad1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-kaviel_threat_text_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English kaviel_threat_text_classifier RoBertaForSequenceClassification from HiddenKise +author: John Snow Labs +name: kaviel_threat_text_classifier +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaviel_threat_text_classifier` is a English model originally trained by HiddenKise. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaviel_threat_text_classifier_en_5.5.0_3.0_1725485888437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaviel_threat_text_classifier_en_5.5.0_3.0_1725485888437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("kaviel_threat_text_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("kaviel_threat_text_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaviel_threat_text_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|416.1 MB| + +## References + +https://huggingface.co/HiddenKise/Kaviel-threat-text-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-keyphrase_extraction_distilbert_inspec_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-keyphrase_extraction_distilbert_inspec_finetuned_ner_en.md new file mode 100644 index 00000000000000..351d6043527f94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-keyphrase_extraction_distilbert_inspec_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English keyphrase_extraction_distilbert_inspec_finetuned_ner DistilBertForTokenClassification from jaggernaut007 +author: John Snow Labs +name: keyphrase_extraction_distilbert_inspec_finetuned_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`keyphrase_extraction_distilbert_inspec_finetuned_ner` is a English model originally trained by jaggernaut007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/keyphrase_extraction_distilbert_inspec_finetuned_ner_en_5.5.0_3.0_1725492561339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/keyphrase_extraction_distilbert_inspec_finetuned_ner_en_5.5.0_3.0_1725492561339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("keyphrase_extraction_distilbert_inspec_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("keyphrase_extraction_distilbert_inspec_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|keyphrase_extraction_distilbert_inspec_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|246.7 MB| + +## References + +https://huggingface.co/jaggernaut007/keyphrase-extraction-distilbert-inspec-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-legal_roberta_large_en.md b/docs/_posts/ahmedlone127/2024-09-04-legal_roberta_large_en.md new file mode 100644 index 00000000000000..ee0c1ffeef5652 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-legal_roberta_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English legal_roberta_large RoBertaEmbeddings from lexlms +author: John Snow Labs +name: legal_roberta_large +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_roberta_large` is a English model originally trained by lexlms. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_roberta_large_en_5.5.0_3.0_1725412321582.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_roberta_large_en_5.5.0_3.0_1725412321582.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("legal_roberta_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("legal_roberta_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_roberta_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/lexlms/legal-roberta-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_en.md b/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_en.md new file mode 100644 index 00000000000000..49d9dfdb54b38a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English linkbert_mini AlbertForTokenClassification from dejanseo +author: John Snow Labs +name: linkbert_mini +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_mini` is a English model originally trained by dejanseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_mini_en_5.5.0_3.0_1725487007045.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_mini_en_5.5.0_3.0_1725487007045.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("linkbert_mini","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("linkbert_mini", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_mini| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/dejanseo/LinkBERT-mini \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_pipeline_en.md new file mode 100644 index 00000000000000..6a83e945dadfd1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-linkbert_mini_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English linkbert_mini_pipeline pipeline AlbertForTokenClassification from dejanseo +author: John Snow Labs +name: linkbert_mini_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_mini_pipeline` is a English model originally trained by dejanseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_mini_pipeline_en_5.5.0_3.0_1725487009327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_mini_pipeline_en_5.5.0_3.0_1725487009327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("linkbert_mini_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("linkbert_mini_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_mini_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/dejanseo/LinkBERT-mini + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_en.md b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_en.md new file mode 100644 index 00000000000000..5809660247af71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lithuanian_hansardmatch MPNetEmbeddings from matthewleechen +author: John Snow Labs +name: lithuanian_hansardmatch +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lithuanian_hansardmatch` is a English model originally trained by matthewleechen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lithuanian_hansardmatch_en_5.5.0_3.0_1725470647147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lithuanian_hansardmatch_en_5.5.0_3.0_1725470647147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("lithuanian_hansardmatch","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("lithuanian_hansardmatch","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lithuanian_hansardmatch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/matthewleechen/lt_hansardmatch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_pipeline_en.md new file mode 100644 index 00000000000000..f6a25c80fbfca8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_hansardmatch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lithuanian_hansardmatch_pipeline pipeline MPNetEmbeddings from matthewleechen +author: John Snow Labs +name: lithuanian_hansardmatch_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lithuanian_hansardmatch_pipeline` is a English model originally trained by matthewleechen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lithuanian_hansardmatch_pipeline_en_5.5.0_3.0_1725470667892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lithuanian_hansardmatch_pipeline_en_5.5.0_3.0_1725470667892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lithuanian_hansardmatch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lithuanian_hansardmatch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lithuanian_hansardmatch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/matthewleechen/lt_hansardmatch + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_en.md b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_en.md new file mode 100644 index 00000000000000..8fa8c9dd83d080 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lithuanian_namesonly_humancapital MPNetEmbeddings from matthewleechen +author: John Snow Labs +name: lithuanian_namesonly_humancapital +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lithuanian_namesonly_humancapital` is a English model originally trained by matthewleechen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lithuanian_namesonly_humancapital_en_5.5.0_3.0_1725470532101.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lithuanian_namesonly_humancapital_en_5.5.0_3.0_1725470532101.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("lithuanian_namesonly_humancapital","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("lithuanian_namesonly_humancapital","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lithuanian_namesonly_humancapital| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/matthewleechen/lt_namesonly_humancapital \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_pipeline_en.md new file mode 100644 index 00000000000000..429a72ff822045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_namesonly_humancapital_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lithuanian_namesonly_humancapital_pipeline pipeline MPNetEmbeddings from matthewleechen +author: John Snow Labs +name: lithuanian_namesonly_humancapital_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lithuanian_namesonly_humancapital_pipeline` is a English model originally trained by matthewleechen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lithuanian_namesonly_humancapital_pipeline_en_5.5.0_3.0_1725470552498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lithuanian_namesonly_humancapital_pipeline_en_5.5.0_3.0_1725470552498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lithuanian_namesonly_humancapital_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lithuanian_namesonly_humancapital_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lithuanian_namesonly_humancapital_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/matthewleechen/lt_namesonly_humancapital + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lithuanian_un_data_fine_coarse_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_un_data_fine_coarse_english_pipeline_en.md new file mode 100644 index 00000000000000..3c288121149d99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lithuanian_un_data_fine_coarse_english_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lithuanian_un_data_fine_coarse_english_pipeline pipeline MPNetEmbeddings from dell-research-harvard +author: John Snow Labs +name: lithuanian_un_data_fine_coarse_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lithuanian_un_data_fine_coarse_english_pipeline` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lithuanian_un_data_fine_coarse_english_pipeline_en_5.5.0_3.0_1725470030880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lithuanian_un_data_fine_coarse_english_pipeline_en_5.5.0_3.0_1725470030880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lithuanian_un_data_fine_coarse_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lithuanian_un_data_fine_coarse_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lithuanian_un_data_fine_coarse_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/dell-research-harvard/lt-un-data-fine-coarse-en + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_en.md b/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_en.md new file mode 100644 index 00000000000000..9efc562e8c2b82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lm_ner_skills_recognition DistilBertForTokenClassification from GalalEwida +author: John Snow Labs +name: lm_ner_skills_recognition +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_ner_skills_recognition` is a English model originally trained by GalalEwida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_ner_skills_recognition_en_5.5.0_3.0_1725492659944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_ner_skills_recognition_en_5.5.0_3.0_1725492659944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("lm_ner_skills_recognition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("lm_ner_skills_recognition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_ner_skills_recognition| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/GalalEwida/lm-ner-skills-recognition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_pipeline_en.md new file mode 100644 index 00000000000000..4d2d4aade8141a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-lm_ner_skills_recognition_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lm_ner_skills_recognition_pipeline pipeline DistilBertForTokenClassification from GalalEwida +author: John Snow Labs +name: lm_ner_skills_recognition_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_ner_skills_recognition_pipeline` is a English model originally trained by GalalEwida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_ner_skills_recognition_pipeline_en_5.5.0_3.0_1725492673809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_ner_skills_recognition_pipeline_en_5.5.0_3.0_1725492673809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lm_ner_skills_recognition_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lm_ner_skills_recognition_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_ner_skills_recognition_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/GalalEwida/lm-ner-skills-recognition + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mach_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-mach_1_en.md new file mode 100644 index 00000000000000..8377e61b96e3ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mach_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mach_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: mach_1 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mach_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mach_1_en_5.5.0_3.0_1725485268756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mach_1_en_5.5.0_3.0_1725485268756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("mach_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("mach_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mach_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Mach_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-magbert_lm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-magbert_lm_pipeline_en.md new file mode 100644 index 00000000000000..8f9f2df9ba82fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-magbert_lm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English magbert_lm_pipeline pipeline CamemBertEmbeddings from TypicaAI +author: John Snow Labs +name: magbert_lm_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`magbert_lm_pipeline` is a English model originally trained by TypicaAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/magbert_lm_pipeline_en_5.5.0_3.0_1725442082587.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/magbert_lm_pipeline_en_5.5.0_3.0_1725442082587.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("magbert_lm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("magbert_lm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|magbert_lm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.5 MB| + +## References + +https://huggingface.co/TypicaAI/magbert-lm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en.md b/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en.md new file mode 100644 index 00000000000000..de8cce558492bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1 MarianTransformer from Lingrui1 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1 +date: 2024-09-04 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1` is a English model originally trained by Lingrui1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en_5.5.0_3.0_1725493751865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1_en_5.5.0_3.0_1725493751865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_lingrui1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/Lingrui1/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en.md new file mode 100644 index 00000000000000..e26dbbeba70194 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline pipeline MarianTransformer from sooh098 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline` is a English model originally trained by sooh098. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en_5.5.0_3.0_1725493933643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline_en_5.5.0_3.0_1725493933643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_sooh098_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.7 MB| + +## References + +https://huggingface.co/sooh098/marian-finetuned-kde4-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-marianmt_igbo_best_18_10_23_pipeline_ig.md b/docs/_posts/ahmedlone127/2024-09-04-marianmt_igbo_best_18_10_23_pipeline_ig.md new file mode 100644 index 00000000000000..79e5ee30eac2e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-marianmt_igbo_best_18_10_23_pipeline_ig.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Igbo marianmt_igbo_best_18_10_23_pipeline pipeline MarianTransformer from Sunbird +author: John Snow Labs +name: marianmt_igbo_best_18_10_23_pipeline +date: 2024-09-04 +tags: [ig, open_source, pipeline, onnx] +task: Translation +language: ig +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmt_igbo_best_18_10_23_pipeline` is a Igbo model originally trained by Sunbird. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmt_igbo_best_18_10_23_pipeline_ig_5.5.0_3.0_1725493946083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmt_igbo_best_18_10_23_pipeline_ig_5.5.0_3.0_1725493946083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marianmt_igbo_best_18_10_23_pipeline", lang = "ig") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marianmt_igbo_best_18_10_23_pipeline", lang = "ig") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmt_igbo_best_18_10_23_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ig| +|Size:|532.4 MB| + +## References + +https://huggingface.co/Sunbird/MarianMT_Igbo_best_18_10_23 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-masking_heaps_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-04-masking_heaps_distilbert_en.md new file mode 100644 index 00000000000000..a811ca4175b62d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-masking_heaps_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English masking_heaps_distilbert DistilBertEmbeddings from johannes-garstenauer +author: John Snow Labs +name: masking_heaps_distilbert +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masking_heaps_distilbert` is a English model originally trained by johannes-garstenauer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masking_heaps_distilbert_en_5.5.0_3.0_1725418281727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masking_heaps_distilbert_en_5.5.0_3.0_1725418281727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("masking_heaps_distilbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("masking_heaps_distilbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masking_heaps_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/johannes-garstenauer/masking-heaps-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_metaphor_detection_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_metaphor_detection_spanish_es.md new file mode 100644 index 00000000000000..c9a99b4fc97652 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_metaphor_detection_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish mdeberta_base_metaphor_detection_spanish DeBertaForTokenClassification from HiTZ +author: John Snow Labs +name: mdeberta_base_metaphor_detection_spanish +date: 2024-09-04 +tags: [es, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_metaphor_detection_spanish` is a Castilian, Spanish model originally trained by HiTZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_metaphor_detection_spanish_es_5.5.0_3.0_1725472444469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_metaphor_detection_spanish_es_5.5.0_3.0_1725472444469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_base_metaphor_detection_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_base_metaphor_detection_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_metaphor_detection_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|846.8 MB| + +## References + +https://huggingface.co/HiTZ/mdeberta-base-metaphor-detection-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_4_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_4_en.md new file mode 100644 index 00000000000000..bd80f3ff562f38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_base_v3_4 DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_4 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_4` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_4_en_5.5.0_3.0_1725438535286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_4_en_5.5.0_3.0_1725438535286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|831.7 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_en.md new file mode 100644 index 00000000000000..c06dad73352f20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_base_v3_5 DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_5 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_5` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_5_en_5.5.0_3.0_1725438792110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_5_en_5.5.0_3.0_1725438792110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|832.7 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_pipeline_en.md new file mode 100644 index 00000000000000..2e4444909c4178 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_base_v3_5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_base_v3_5_pipeline pipeline DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_5_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_5_pipeline` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_5_pipeline_en_5.5.0_3.0_1725438868240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_5_pipeline_en_5.5.0_3.0_1725438868240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_base_v3_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_base_v3_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|832.7 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_5 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_profane_final_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_profane_final_pipeline_en.md new file mode 100644 index 00000000000000..646ab6fc7055a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_profane_final_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_profane_final_pipeline pipeline DeBertaForSequenceClassification from SiddharthaM +author: John Snow Labs +name: mdeberta_profane_final_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_profane_final_pipeline` is a English model originally trained by SiddharthaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_profane_final_pipeline_en_5.5.0_3.0_1725468832932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_profane_final_pipeline_en_5.5.0_3.0_1725468832932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_profane_final_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_profane_final_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_profane_final_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|816.6 MB| + +## References + +https://huggingface.co/SiddharthaM/mdeberta-profane-final + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuded_porttagger_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuded_porttagger_pipeline_en.md new file mode 100644 index 00000000000000..a3d9785fd33472 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuded_porttagger_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_finetuded_porttagger_pipeline pipeline DeBertaForTokenClassification from Emanuel +author: John Snow Labs +name: mdeberta_v3_base_finetuded_porttagger_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_finetuded_porttagger_pipeline` is a English model originally trained by Emanuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuded_porttagger_pipeline_en_5.5.0_3.0_1725472109970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuded_porttagger_pipeline_en_5.5.0_3.0_1725472109970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_finetuded_porttagger_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_finetuded_porttagger_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_finetuded_porttagger_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|830.4 MB| + +## References + +https://huggingface.co/Emanuel/mdeberta-v3-base-finetuded-porttagger + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en.md new file mode 100644 index 00000000000000..1a4b6229d0af34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline pipeline DeBertaForTokenClassification from Isotonic +author: John Snow Labs +name: mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline` is a English model originally trained by Isotonic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en_5.5.0_3.0_1725472931903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline_en_5.5.0_3.0_1725472931903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_finetuned_ai4privacy_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|890.8 MB| + +## References + +https://huggingface.co/Isotonic/mdeberta-v3-base_finetuned_ai4privacy_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_open_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_open_ner_en.md new file mode 100644 index 00000000000000..548f2e733c09e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_open_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_open_ner DeBertaForTokenClassification from yongsun-yoon +author: John Snow Labs +name: mdeberta_v3_base_open_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_open_ner` is a English model originally trained by yongsun-yoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_open_ner_en_5.5.0_3.0_1725473637948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_open_ner_en_5.5.0_3.0_1725473637948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_open_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("mdeberta_v3_base_open_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_open_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|910.8 MB| + +## References + +https://huggingface.co/yongsun-yoon/mdeberta-v3-base-open-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_sst2_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_sst2_100_pipeline_en.md new file mode 100644 index 00000000000000..7fa1cfc43e2b67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_sst2_100_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_sst2_100_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_sst2_100_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_sst2_100_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_sst2_100_pipeline_en_5.5.0_3.0_1725440831209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_sst2_100_pipeline_en_5.5.0_3.0_1725440831209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_sst2_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_sst2_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_sst2_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|785.6 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-sst2-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_vnrte_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_vnrte_100_pipeline_en.md new file mode 100644 index 00000000000000..817774b5953251 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_base_vnrte_100_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_vnrte_100_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_vnrte_100_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_vnrte_100_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_vnrte_100_pipeline_en_5.5.0_3.0_1725469411230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_vnrte_100_pipeline_en_5.5.0_3.0_1725469411230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_vnrte_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_vnrte_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_vnrte_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.8 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-vnrte-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_en.md new file mode 100644 index 00000000000000..58d1fb7f5d6bcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_emo_multilabel DeBertaForSequenceClassification from RolMax +author: John Snow Labs +name: mdeberta_v3_emo_multilabel +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_emo_multilabel` is a English model originally trained by RolMax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_emo_multilabel_en_5.5.0_3.0_1725467767833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_emo_multilabel_en_5.5.0_3.0_1725467767833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_emo_multilabel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_emo_multilabel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_emo_multilabel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|865.0 MB| + +## References + +https://huggingface.co/RolMax/mdeberta-v3-emo-multilabel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_pipeline_en.md new file mode 100644 index 00000000000000..06fcadc95d3674 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mdeberta_v3_emo_multilabel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_emo_multilabel_pipeline pipeline DeBertaForSequenceClassification from RolMax +author: John Snow Labs +name: mdeberta_v3_emo_multilabel_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_emo_multilabel_pipeline` is a English model originally trained by RolMax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_emo_multilabel_pipeline_en_5.5.0_3.0_1725467818347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_emo_multilabel_pipeline_en_5.5.0_3.0_1725467818347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_emo_multilabel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_emo_multilabel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_emo_multilabel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|865.0 MB| + +## References + +https://huggingface.co/RolMax/mdeberta-v3-emo-multilabel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en.md new file mode 100644 index 00000000000000..bd273528d51b2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline pipeline DeBertaForSequenceClassification from Gladiator +author: John Snow Labs +name: microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline` is a English model originally trained by Gladiator. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en_5.5.0_3.0_1725462336034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline_en_5.5.0_3.0_1725462336034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_cls_sst2_gladiator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Gladiator/microsoft-deberta-v3-large_cls_sst2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en.md new file mode 100644 index 00000000000000..408b5287af3754 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline pipeline DeBertaForTokenClassification from Yanis +author: John Snow Labs +name: microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline` is a English model originally trained by Yanis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en_5.5.0_3.0_1725472236166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline_en_5.5.0_3.0_1725472236166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_ner_conll2003_general_model_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Yanis/microsoft-deberta-v3-large_ner_conll2003-general-model-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mix4_japanese_english_fugumt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mix4_japanese_english_fugumt_pipeline_en.md new file mode 100644 index 00000000000000..ea944e9e4c0248 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mix4_japanese_english_fugumt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mix4_japanese_english_fugumt_pipeline pipeline MarianTransformer from twieland +author: John Snow Labs +name: mix4_japanese_english_fugumt_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mix4_japanese_english_fugumt_pipeline` is a English model originally trained by twieland. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mix4_japanese_english_fugumt_pipeline_en_5.5.0_3.0_1725494024365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mix4_japanese_english_fugumt_pipeline_en_5.5.0_3.0_1725494024365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mix4_japanese_english_fugumt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mix4_japanese_english_fugumt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mix4_japanese_english_fugumt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|352.9 MB| + +## References + +https://huggingface.co/twieland/MIX4_ja-en_fugumt + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mnli_microsoft_deberta_v3_large_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mnli_microsoft_deberta_v3_large_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..85b6d4bda9714f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mnli_microsoft_deberta_v3_large_seed_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mnli_microsoft_deberta_v3_large_seed_2_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_microsoft_deberta_v3_large_seed_2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_microsoft_deberta_v3_large_seed_2_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_2_pipeline_en_5.5.0_3.0_1725468034669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_2_pipeline_en_5.5.0_3.0_1725468034669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mnli_microsoft_deberta_v3_large_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mnli_microsoft_deberta_v3_large_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_microsoft_deberta_v3_large_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/mnli_microsoft_deberta-v3-large_seed-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_en.md b/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_en.md new file mode 100644 index 00000000000000..456cc72d8e3b8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model_albert_512_token_classification AlbertForTokenClassification from adhi29 +author: John Snow Labs +name: model_albert_512_token_classification +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_albert_512_token_classification` is a English model originally trained by adhi29. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_albert_512_token_classification_en_5.5.0_3.0_1725486604360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_albert_512_token_classification_en_5.5.0_3.0_1725486604360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("model_albert_512_token_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("model_albert_512_token_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_albert_512_token_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/adhi29/model_albert_512_token_classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_pipeline_en.md new file mode 100644 index 00000000000000..9f43be2a4736a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-model_albert_512_token_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_albert_512_token_classification_pipeline pipeline AlbertForTokenClassification from adhi29 +author: John Snow Labs +name: model_albert_512_token_classification_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_albert_512_token_classification_pipeline` is a English model originally trained by adhi29. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_albert_512_token_classification_pipeline_en_5.5.0_3.0_1725486606753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_albert_512_token_classification_pipeline_en_5.5.0_3.0_1725486606753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_albert_512_token_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_albert_512_token_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_albert_512_token_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/adhi29/model_albert_512_token_classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mother_tongue_model_v3_sn.md b/docs/_posts/ahmedlone127/2024-09-04-mother_tongue_model_v3_sn.md new file mode 100644 index 00000000000000..10697518380f2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mother_tongue_model_v3_sn.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Shona mother_tongue_model_v3 WhisperForCTC from MothersTongue +author: John Snow Labs +name: mother_tongue_model_v3 +date: 2024-09-04 +tags: [sn, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: sn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mother_tongue_model_v3` is a Shona model originally trained by MothersTongue. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mother_tongue_model_v3_sn_5.5.0_3.0_1725430106079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mother_tongue_model_v3_sn_5.5.0_3.0_1725430106079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("mother_tongue_model_v3","sn") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("mother_tongue_model_v3", "sn") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mother_tongue_model_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|sn| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MothersTongue/mother_tongue_model_v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mount2_model_en.md b/docs/_posts/ahmedlone127/2024-09-04-mount2_model_en.md new file mode 100644 index 00000000000000..2e337118515b31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mount2_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mount2_model MPNetEmbeddings from ilhkn +author: John Snow Labs +name: mount2_model +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mount2_model` is a English model originally trained by ilhkn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mount2_model_en_5.5.0_3.0_1725470526835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mount2_model_en_5.5.0_3.0_1725470526835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mount2_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mount2_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mount2_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ilhkn/mount2_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mount2_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mount2_model_pipeline_en.md new file mode 100644 index 00000000000000..d0d63bf6568a6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mount2_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mount2_model_pipeline pipeline MPNetEmbeddings from ilhkn +author: John Snow Labs +name: mount2_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mount2_model_pipeline` is a English model originally trained by ilhkn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mount2_model_pipeline_en_5.5.0_3.0_1725470546879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mount2_model_pipeline_en_5.5.0_3.0_1725470546879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mount2_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mount2_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mount2_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ilhkn/mount2_model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_en.md b/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_en.md new file mode 100644 index 00000000000000..62920bde67d100 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_allnli MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: mpnet_base_allnli +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_allnli` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_allnli_en_5.5.0_3.0_1725470549836.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_allnli_en_5.5.0_3.0_1725470549836.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_allnli","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_allnli","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_allnli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|396.7 MB| + +## References + +https://huggingface.co/tomaarsen/mpnet-base-allnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_pipeline_en.md new file mode 100644 index 00000000000000..114b0c9d8d1a5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mpnet_base_allnli_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_allnli_pipeline pipeline MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: mpnet_base_allnli_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_allnli_pipeline` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_allnli_pipeline_en_5.5.0_3.0_1725470573737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_allnli_pipeline_en_5.5.0_3.0_1725470573737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_allnli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_allnli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_allnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|396.7 MB| + +## References + +https://huggingface.co/tomaarsen/mpnet-base-allnli + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_en.md b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_en.md new file mode 100644 index 00000000000000..6de7dfe69746cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mt5_base_qaqg_finetuned_tydiqa_indonesian T5Transformer from hawalurahman +author: John Snow Labs +name: mt5_base_qaqg_finetuned_tydiqa_indonesian +date: 2024-09-04 +tags: [en, open_source, onnx, t5, question_answering, summarization, translation, text_generation] +task: [Question Answering, Summarization, Translation, Text Generation] +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: T5Transformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained T5Transformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mt5_base_qaqg_finetuned_tydiqa_indonesian` is a English model originally trained by hawalurahman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_en_5.5.0_3.0_1725459551481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_en_5.5.0_3.0_1725459551481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +t5 = T5Transformer.pretrained("mt5_base_qaqg_finetuned_tydiqa_indonesian","en") \ + .setInputCols(["document"]) \ + .setOutputCol("output") + +pipeline = Pipeline().setStages([documentAssembler, t5]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val t5 = T5Transformer.pretrained("mt5_base_qaqg_finetuned_tydiqa_indonesian", "en") + .setInputCols(Array("documents")) + .setOutputCol("output") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, t5)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mt5_base_qaqg_finetuned_tydiqa_indonesian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[output]| +|Language:|en| +|Size:|2.3 GB| + +## References + +https://huggingface.co/hawalurahman/mt5-base-qaqg-finetuned-TydiQA-id \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en.md b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en.md new file mode 100644 index 00000000000000..c8387f2fa0924b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mt5_base_qaqg_finetuned_tydiqa_indonesian_ir T5Transformer from hawalurahman +author: John Snow Labs +name: mt5_base_qaqg_finetuned_tydiqa_indonesian_ir +date: 2024-09-04 +tags: [en, open_source, onnx, t5, question_answering, summarization, translation, text_generation] +task: [Question Answering, Summarization, Translation, Text Generation] +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: T5Transformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained T5Transformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mt5_base_qaqg_finetuned_tydiqa_indonesian_ir` is a English model originally trained by hawalurahman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en_5.5.0_3.0_1725460015247.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_ir_en_5.5.0_3.0_1725460015247.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +t5 = T5Transformer.pretrained("mt5_base_qaqg_finetuned_tydiqa_indonesian_ir","en") \ + .setInputCols(["document"]) \ + .setOutputCol("output") + +pipeline = Pipeline().setStages([documentAssembler, t5]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val t5 = T5Transformer.pretrained("mt5_base_qaqg_finetuned_tydiqa_indonesian_ir", "en") + .setInputCols(Array("documents")) + .setOutputCol("output") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, t5)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mt5_base_qaqg_finetuned_tydiqa_indonesian_ir| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[output]| +|Language:|en| +|Size:|2.3 GB| + +## References + +https://huggingface.co/hawalurahman/mt5-base-qaqg-finetuned-TydiQA-id-ir \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en.md new file mode 100644 index 00000000000000..9544fb85164c0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline pipeline T5Transformer from hawalurahman +author: John Snow Labs +name: mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: [Question Answering, Summarization, Translation, Text Generation] +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained T5Transformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline` is a English model originally trained by hawalurahman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en_5.5.0_3.0_1725459736730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline_en_5.5.0_3.0_1725459736730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mt5_base_qaqg_finetuned_tydiqa_indonesian_sentence_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|2.3 GB| + +## References + +https://huggingface.co/hawalurahman/mt5-base-qaqg-finetuned-TydiQA-id-sentence + +## Included Models + +- DocumentAssembler +- T5Transformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_bert_large_cased_portuguese_contratos_tceal_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_bert_large_cased_portuguese_contratos_tceal_en.md new file mode 100644 index 00000000000000..a7b83069e1b1ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_bert_large_cased_portuguese_contratos_tceal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_bert_large_cased_portuguese_contratos_tceal BertForTokenClassification from begnini +author: John Snow Labs +name: ner_bert_large_cased_portuguese_contratos_tceal +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_bert_large_cased_portuguese_contratos_tceal` is a English model originally trained by begnini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_bert_large_cased_portuguese_contratos_tceal_en_5.5.0_3.0_1725449969013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_bert_large_cased_portuguese_contratos_tceal_en_5.5.0_3.0_1725449969013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_bert_large_cased_portuguese_contratos_tceal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_bert_large_cased_portuguese_contratos_tceal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_bert_large_cased_portuguese_contratos_tceal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/begnini/ner-bert-large-cased-pt-contratos_tceal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_cw_model_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_cw_model_en.md new file mode 100644 index 00000000000000..8f15fb662372fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_cw_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_cw_model DistilBertForTokenClassification from ArshiaKarimian +author: John Snow Labs +name: ner_cw_model +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_cw_model` is a English model originally trained by ArshiaKarimian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_cw_model_en_5.5.0_3.0_1725448710041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_cw_model_en_5.5.0_3.0_1725448710041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_cw_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_cw_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_cw_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ArshiaKarimian/NER_CW_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_distilbert_textminr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_distilbert_textminr_pipeline_en.md new file mode 100644 index 00000000000000..c6d9db78485582 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_distilbert_textminr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_distilbert_textminr_pipeline pipeline DistilBertForTokenClassification from textminr +author: John Snow Labs +name: ner_distilbert_textminr_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_distilbert_textminr_pipeline` is a English model originally trained by textminr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_distilbert_textminr_pipeline_en_5.5.0_3.0_1725448957631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_distilbert_textminr_pipeline_en_5.5.0_3.0_1725448957631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_distilbert_textminr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_distilbert_textminr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_distilbert_textminr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/textminr/ner-distilbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_en.md new file mode 100644 index 00000000000000..c458d497bb244b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_ep_all DistilBertForTokenClassification from Polo123 +author: John Snow Labs +name: ner_model_ep_all +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_ep_all` is a English model originally trained by Polo123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_ep_all_en_5.5.0_3.0_1725475888443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_ep_all_en_5.5.0_3.0_1725475888443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_ep_all","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_ep_all", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_ep_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Polo123/ner_model_ep_all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_pipeline_en.md new file mode 100644 index 00000000000000..0e427ead7237ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_model_ep_all_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_model_ep_all_pipeline pipeline DistilBertForTokenClassification from Polo123 +author: John Snow Labs +name: ner_model_ep_all_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_ep_all_pipeline` is a English model originally trained by Polo123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_ep_all_pipeline_en_5.5.0_3.0_1725475900239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_ep_all_pipeline_en_5.5.0_3.0_1725475900239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_model_ep_all_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_model_ep_all_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_ep_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Polo123/ner_model_ep_all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ner_model_rujengelal_en.md b/docs/_posts/ahmedlone127/2024-09-04-ner_model_rujengelal_en.md new file mode 100644 index 00000000000000..004168d983b5f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ner_model_rujengelal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_rujengelal DistilBertForTokenClassification from rujengelal +author: John Snow Labs +name: ner_model_rujengelal +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_rujengelal` is a English model originally trained by rujengelal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_rujengelal_en_5.5.0_3.0_1725448625487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_rujengelal_en_5.5.0_3.0_1725448625487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_rujengelal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_rujengelal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_rujengelal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rujengelal/ner-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_en.md b/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_en.md new file mode 100644 index 00000000000000..097c02a452353d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nli_conventional_fine_tuning_intradiction AlbertForSequenceClassification from Intradiction +author: John Snow Labs +name: nli_conventional_fine_tuning_intradiction +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nli_conventional_fine_tuning_intradiction` is a English model originally trained by Intradiction. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_intradiction_en_5.5.0_3.0_1725488203468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_intradiction_en_5.5.0_3.0_1725488203468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("nli_conventional_fine_tuning_intradiction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("nli_conventional_fine_tuning_intradiction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nli_conventional_fine_tuning_intradiction| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Intradiction/NLI-Conventional-Fine-Tuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_pipeline_en.md new file mode 100644 index 00000000000000..430067fd20e672 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-nli_conventional_fine_tuning_intradiction_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nli_conventional_fine_tuning_intradiction_pipeline pipeline AlbertForSequenceClassification from Intradiction +author: John Snow Labs +name: nli_conventional_fine_tuning_intradiction_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nli_conventional_fine_tuning_intradiction_pipeline` is a English model originally trained by Intradiction. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_intradiction_pipeline_en_5.5.0_3.0_1725488206056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_intradiction_pipeline_en_5.5.0_3.0_1725488206056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nli_conventional_fine_tuning_intradiction_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nli_conventional_fine_tuning_intradiction_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nli_conventional_fine_tuning_intradiction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Intradiction/NLI-Conventional-Fine-Tuning + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-nlp_mini_project_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-nlp_mini_project_pipeline_en.md new file mode 100644 index 00000000000000..3286a93ab428f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-nlp_mini_project_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nlp_mini_project_pipeline pipeline DistilBertForTokenClassification from LightFury9 +author: John Snow Labs +name: nlp_mini_project_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_mini_project_pipeline` is a English model originally trained by LightFury9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_mini_project_pipeline_en_5.5.0_3.0_1725476223217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_mini_project_pipeline_en_5.5.0_3.0_1725476223217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nlp_mini_project_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nlp_mini_project_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_mini_project_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/LightFury9/nlp-mini-project + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-novelicious_qas_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-novelicious_qas_pipeline_en.md new file mode 100644 index 00000000000000..e5c326392d5e14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-novelicious_qas_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English novelicious_qas_pipeline pipeline DistilBertForQuestionAnswering from ndrakita +author: John Snow Labs +name: novelicious_qas_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`novelicious_qas_pipeline` is a English model originally trained by ndrakita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/novelicious_qas_pipeline_en_5.5.0_3.0_1725465305138.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/novelicious_qas_pipeline_en_5.5.0_3.0_1725465305138.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("novelicious_qas_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("novelicious_qas_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|novelicious_qas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/ndrakita/novelicious-qas + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-olm_roberta_base_latest_summarization_reward_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-olm_roberta_base_latest_summarization_reward_model_pipeline_en.md new file mode 100644 index 00000000000000..9d1ca5a821b1cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-olm_roberta_base_latest_summarization_reward_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English olm_roberta_base_latest_summarization_reward_model_pipeline pipeline RoBertaForSequenceClassification from Tristan +author: John Snow Labs +name: olm_roberta_base_latest_summarization_reward_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`olm_roberta_base_latest_summarization_reward_model_pipeline` is a English model originally trained by Tristan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/olm_roberta_base_latest_summarization_reward_model_pipeline_en_5.5.0_3.0_1725485031087.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/olm_roberta_base_latest_summarization_reward_model_pipeline_en_5.5.0_3.0_1725485031087.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("olm_roberta_base_latest_summarization_reward_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("olm_roberta_base_latest_summarization_reward_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|olm_roberta_base_latest_summarization_reward_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|467.8 MB| + +## References + +https://huggingface.co/Tristan/olm-roberta-base-latest_summarization_reward_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-ope_bert_v1_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-ope_bert_v1_3_pipeline_en.md new file mode 100644 index 00000000000000..99766867e878ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-ope_bert_v1_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ope_bert_v1_3_pipeline pipeline DistilBertEmbeddings from RyotaroOKabe +author: John Snow Labs +name: ope_bert_v1_3_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ope_bert_v1_3_pipeline` is a English model originally trained by RyotaroOKabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ope_bert_v1_3_pipeline_en_5.5.0_3.0_1725418752760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ope_bert_v1_3_pipeline_en_5.5.0_3.0_1725418752760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ope_bert_v1_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ope_bert_v1_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ope_bert_v1_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/RyotaroOKabe/ope_bert_v1.3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-openai_clip_vit_large_patch14_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-openai_clip_vit_large_patch14_pipeline_en.md new file mode 100644 index 00000000000000..d0a9dfeeab851b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-openai_clip_vit_large_patch14_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English openai_clip_vit_large_patch14_pipeline pipeline CLIPForZeroShotClassification from polypo +author: John Snow Labs +name: openai_clip_vit_large_patch14_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`openai_clip_vit_large_patch14_pipeline` is a English model originally trained by polypo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/openai_clip_vit_large_patch14_pipeline_en_5.5.0_3.0_1725491417920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/openai_clip_vit_large_patch14_pipeline_en_5.5.0_3.0_1725491417920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("openai_clip_vit_large_patch14_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("openai_clip_vit_large_patch14_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|openai_clip_vit_large_patch14_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/polypo/openai-clip-vit-large-patch14 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-openai_detector_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-openai_detector_large_pipeline_en.md new file mode 100644 index 00000000000000..36265c51d8e28a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-openai_detector_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English openai_detector_large_pipeline pipeline RoBertaForSequenceClassification from nbroad +author: John Snow Labs +name: openai_detector_large_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`openai_detector_large_pipeline` is a English model originally trained by nbroad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/openai_detector_large_pipeline_en_5.5.0_3.0_1725485618275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/openai_detector_large_pipeline_en_5.5.0_3.0_1725485618275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("openai_detector_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("openai_detector_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|openai_detector_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nbroad/openai-detector-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-openclip_negclip_en.md b/docs/_posts/ahmedlone127/2024-09-04-openclip_negclip_en.md new file mode 100644 index 00000000000000..ed8e2d80780fa8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-openclip_negclip_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English openclip_negclip CLIPForZeroShotClassification from Nano1337 +author: John Snow Labs +name: openclip_negclip +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`openclip_negclip` is a English model originally trained by Nano1337. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/openclip_negclip_en_5.5.0_3.0_1725491525081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/openclip_negclip_en_5.5.0_3.0_1725491525081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("openclip_negclip","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("openclip_negclip","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|openclip_negclip| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.0 MB| + +## References + +https://huggingface.co/Nano1337/openclip-negclip \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..8bd4681f26812e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from PontifexMaximus +author: John Snow Labs +name: opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline` is a English model originally trained by PontifexMaximus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725494182730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725494182730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_german_finetuned_german_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|485.5 MB| + +## References + +https://huggingface.co/PontifexMaximus/opus-mt-en-de-finetuned-de-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en.md new file mode 100644 index 00000000000000..aee4fbe449818a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline pipeline MarianTransformer from Rooshan +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline` is a English model originally trained by Rooshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en_5.5.0_3.0_1725494163900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline_en_5.5.0_3.0_1725494163900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_rooshan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.2 MB| + +## References + +https://huggingface.co/Rooshan/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en.md new file mode 100644 index 00000000000000..6d0545931d3841 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99 MarianTransformer from Susmit99 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99 +date: 2024-09-04 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99` is a English model originally trained by Susmit99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en_5.5.0_3.0_1725494214214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99_en_5.5.0_3.0_1725494214214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_susmit99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/Susmit99/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_finetuned_korean_german_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_finetuned_korean_german_en.md new file mode 100644 index 00000000000000..5ca780cfc37a29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_finetuned_korean_german_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_finetuned_korean_german MarianTransformer from Uiji +author: John Snow Labs +name: opus_maltese_finetuned_korean_german +date: 2024-09-04 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_finetuned_korean_german` is a English model originally trained by Uiji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_finetuned_korean_german_en_5.5.0_3.0_1725493753072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_finetuned_korean_german_en_5.5.0_3.0_1725493753072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_finetuned_korean_german","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_finetuned_korean_german","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_finetuned_korean_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|523.3 MB| + +## References + +https://huggingface.co/Uiji/opus-mt-finetuned-ko-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en.md new file mode 100644 index 00000000000000..da3f4e4c21ccc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english MarianTransformer from KitoEver +author: John Snow Labs +name: opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english +date: 2024-09-04 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english` is a English model originally trained by KitoEver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en_5.5.0_3.0_1725493753777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_en_5.5.0_3.0_1725493753777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|513.1 MB| + +## References + +https://huggingface.co/KitoEver/opus-mt-lg-en-finetuned-lg-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..3e7c47e6a13b4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from KitoEver +author: John Snow Labs +name: opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline` is a English model originally trained by KitoEver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725493784570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725493784570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_ganda_english_finetuned_ganda_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|513.6 MB| + +## References + +https://huggingface.co/KitoEver/opus-mt-lg-en-finetuned-lg-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-pharma_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-pharma_classification_pipeline_en.md new file mode 100644 index 00000000000000..8f0deef943cab3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-pharma_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pharma_classification_pipeline pipeline DistilBertForSequenceClassification from skylord +author: John Snow Labs +name: pharma_classification_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharma_classification_pipeline` is a English model originally trained by skylord. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharma_classification_pipeline_en_5.5.0_3.0_1725489626084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharma_classification_pipeline_en_5.5.0_3.0_1725489626084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pharma_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pharma_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharma_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/skylord/pharma_classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-practice_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-practice_model_pipeline_en.md new file mode 100644 index 00000000000000..1e3073365cac23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-practice_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English practice_model_pipeline pipeline CamemBertEmbeddings from OOOHHHIII +author: John Snow Labs +name: practice_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practice_model_pipeline` is a English model originally trained by OOOHHHIII. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practice_model_pipeline_en_5.5.0_3.0_1725442586729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practice_model_pipeline_en_5.5.0_3.0_1725442586729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("practice_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("practice_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practice_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/OOOHHHIII/practice-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_large_pipeline_en.md new file mode 100644 index 00000000000000..606e63cd4def68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English predict_political_group_camembert_large_pipeline pipeline CamemBertForSequenceClassification from ekazuki +author: John Snow Labs +name: predict_political_group_camembert_large_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_political_group_camembert_large_pipeline` is a English model originally trained by ekazuki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_political_group_camembert_large_pipeline_en_5.5.0_3.0_1725480932987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_political_group_camembert_large_pipeline_en_5.5.0_3.0_1725480932987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("predict_political_group_camembert_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("predict_political_group_camembert_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_political_group_camembert_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|414.0 MB| + +## References + +https://huggingface.co/ekazuki/predict_political_group_camembert_large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_tweet_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_tweet_pipeline_en.md new file mode 100644 index 00000000000000..4936a3fbc72b4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-predict_political_group_camembert_tweet_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English predict_political_group_camembert_tweet_pipeline pipeline CamemBertForSequenceClassification from ekazuki +author: John Snow Labs +name: predict_political_group_camembert_tweet_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_political_group_camembert_tweet_pipeline` is a English model originally trained by ekazuki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_political_group_camembert_tweet_pipeline_en_5.5.0_3.0_1725480388356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_political_group_camembert_tweet_pipeline_en_5.5.0_3.0_1725480388356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("predict_political_group_camembert_tweet_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("predict_political_group_camembert_tweet_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_political_group_camembert_tweet_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|400.9 MB| + +## References + +https://huggingface.co/ekazuki/predict_political_group_camembert_tweet + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-promptengpromptclassification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-promptengpromptclassification_pipeline_en.md new file mode 100644 index 00000000000000..0c5320670e6d0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-promptengpromptclassification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English promptengpromptclassification_pipeline pipeline RoBertaForSequenceClassification from sahilml +author: John Snow Labs +name: promptengpromptclassification_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`promptengpromptclassification_pipeline` is a English model originally trained by sahilml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/promptengpromptclassification_pipeline_en_5.5.0_3.0_1725453325501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/promptengpromptclassification_pipeline_en_5.5.0_3.0_1725453325501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("promptengpromptclassification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("promptengpromptclassification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|promptengpromptclassification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|426.3 MB| + +## References + +https://huggingface.co/sahilml/promptEngPromptClassification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-pubmed_clip_vit_base_patch32_en.md b/docs/_posts/ahmedlone127/2024-09-04-pubmed_clip_vit_base_patch32_en.md new file mode 100644 index 00000000000000..cb6c8f5575e7ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-pubmed_clip_vit_base_patch32_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English pubmed_clip_vit_base_patch32 CLIPForZeroShotClassification from flaviagiammarino +author: John Snow Labs +name: pubmed_clip_vit_base_patch32 +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmed_clip_vit_base_patch32` is a English model originally trained by flaviagiammarino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmed_clip_vit_base_patch32_en_5.5.0_3.0_1725491301380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmed_clip_vit_base_patch32_en_5.5.0_3.0_1725491301380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("pubmed_clip_vit_base_patch32","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("pubmed_clip_vit_base_patch32","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmed_clip_vit_base_patch32| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|399.9 MB| + +## References + +https://huggingface.co/flaviagiammarino/pubmed-clip-vit-base-patch32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_en.md b/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_en.md new file mode 100644 index 00000000000000..7c8a269d20f048 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English q05_kaggle_debertav2_14 DeBertaForSequenceClassification from wallacenpj +author: John Snow Labs +name: q05_kaggle_debertav2_14 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q05_kaggle_debertav2_14` is a English model originally trained by wallacenpj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q05_kaggle_debertav2_14_en_5.5.0_3.0_1725467499791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q05_kaggle_debertav2_14_en_5.5.0_3.0_1725467499791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("q05_kaggle_debertav2_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("q05_kaggle_debertav2_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q05_kaggle_debertav2_14| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|556.5 MB| + +## References + +https://huggingface.co/wallacenpj/q05_kaggle_debertav2-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_pipeline_en.md new file mode 100644 index 00000000000000..815f6115c523fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-q05_kaggle_debertav2_14_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English q05_kaggle_debertav2_14_pipeline pipeline DeBertaForSequenceClassification from wallacenpj +author: John Snow Labs +name: q05_kaggle_debertav2_14_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q05_kaggle_debertav2_14_pipeline` is a English model originally trained by wallacenpj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q05_kaggle_debertav2_14_pipeline_en_5.5.0_3.0_1725467582937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q05_kaggle_debertav2_14_pipeline_en_5.5.0_3.0_1725467582937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("q05_kaggle_debertav2_14_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("q05_kaggle_debertav2_14_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q05_kaggle_debertav2_14_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|556.5 MB| + +## References + +https://huggingface.co/wallacenpj/q05_kaggle_debertav2-14 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_en.md b/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_en.md new file mode 100644 index 00000000000000..f07def87db0717 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_roberta_model2 RoBertaForQuestionAnswering from MattNandavong +author: John Snow Labs +name: qa_roberta_model2 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_roberta_model2` is a English model originally trained by MattNandavong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_roberta_model2_en_5.5.0_3.0_1725484005850.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_roberta_model2_en_5.5.0_3.0_1725484005850.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("qa_roberta_model2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("qa_roberta_model2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_roberta_model2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/MattNandavong/QA-roberta-model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_pipeline_en.md new file mode 100644 index 00000000000000..f8b110efac4e2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-qa_roberta_model2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_roberta_model2_pipeline pipeline RoBertaForQuestionAnswering from MattNandavong +author: John Snow Labs +name: qa_roberta_model2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_roberta_model2_pipeline` is a English model originally trained by MattNandavong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_roberta_model2_pipeline_en_5.5.0_3.0_1725484101637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_roberta_model2_pipeline_en_5.5.0_3.0_1725484101637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_roberta_model2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_roberta_model2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_roberta_model2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/MattNandavong/QA-roberta-model2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-question_answering_roberta_base_s_en.md b/docs/_posts/ahmedlone127/2024-09-04-question_answering_roberta_base_s_en.md new file mode 100644 index 00000000000000..b70f80fa51615c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-question_answering_roberta_base_s_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English question_answering_roberta_base_s RoBertaForQuestionAnswering from consciousAI +author: John Snow Labs +name: question_answering_roberta_base_s +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_roberta_base_s` is a English model originally trained by consciousAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_roberta_base_s_en_5.5.0_3.0_1725451361528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_roberta_base_s_en_5.5.0_3.0_1725451361528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("question_answering_roberta_base_s","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("question_answering_roberta_base_s", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_roberta_base_s| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|461.9 MB| + +## References + +https://huggingface.co/consciousAI/question-answering-roberta-base-s \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-raj_model_en.md b/docs/_posts/ahmedlone127/2024-09-04-raj_model_en.md new file mode 100644 index 00000000000000..c9bf514e62d970 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-raj_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English raj_model DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: raj_model +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`raj_model` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/raj_model_en_5.5.0_3.0_1725493092976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/raj_model_en_5.5.0_3.0_1725493092976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("raj_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("raj_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|raj_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/Gkumi/Raj-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-raj_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-raj_model_pipeline_en.md new file mode 100644 index 00000000000000..29d9ad1c9c7fdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-raj_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English raj_model_pipeline pipeline DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: raj_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`raj_model_pipeline` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/raj_model_pipeline_en_5.5.0_3.0_1725493104452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/raj_model_pipeline_en_5.5.0_3.0_1725493104452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("raj_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("raj_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|raj_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/Gkumi/Raj-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_ar.md b/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_ar.md new file mode 100644 index 00000000000000..79fc05a5013b58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic readabert_arabic BertForSequenceClassification from tareknaous +author: John Snow Labs +name: readabert_arabic +date: 2024-09-04 +tags: [ar, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readabert_arabic` is a Arabic model originally trained by tareknaous. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readabert_arabic_ar_5.5.0_3.0_1725432788077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readabert_arabic_ar_5.5.0_3.0_1725432788077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("readabert_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("readabert_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readabert_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|ar| +|Size:|507.3 MB| + +## References + +https://huggingface.co/tareknaous/readabert-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_pipeline_ar.md new file mode 100644 index 00000000000000..fb8195b0b08721 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-readabert_arabic_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic readabert_arabic_pipeline pipeline BertForSequenceClassification from tareknaous +author: John Snow Labs +name: readabert_arabic_pipeline +date: 2024-09-04 +tags: [ar, open_source, pipeline, onnx] +task: Text Classification +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readabert_arabic_pipeline` is a Arabic model originally trained by tareknaous. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readabert_arabic_pipeline_ar_5.5.0_3.0_1725432819115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readabert_arabic_pipeline_ar_5.5.0_3.0_1725432819115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("readabert_arabic_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("readabert_arabic_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readabert_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|507.3 MB| + +## References + +https://huggingface.co/tareknaous/readabert-ar + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-readability_spanish_paragraphs_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-04-readability_spanish_paragraphs_pipeline_es.md new file mode 100644 index 00000000000000..c0bc2e03fd350f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-readability_spanish_paragraphs_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish readability_spanish_paragraphs_pipeline pipeline RoBertaForSequenceClassification from somosnlp-hackathon-2022 +author: John Snow Labs +name: readability_spanish_paragraphs_pipeline +date: 2024-09-04 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readability_spanish_paragraphs_pipeline` is a Castilian, Spanish model originally trained by somosnlp-hackathon-2022. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readability_spanish_paragraphs_pipeline_es_5.5.0_3.0_1725485293189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readability_spanish_paragraphs_pipeline_es_5.5.0_3.0_1725485293189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("readability_spanish_paragraphs_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("readability_spanish_paragraphs_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readability_spanish_paragraphs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|444.5 MB| + +## References + +https://huggingface.co/somosnlp-hackathon-2022/readability-es-paragraphs + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en.md new file mode 100644 index 00000000000000..ae898329807b79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline pipeline CamemBertEmbeddings from comartinez +author: John Snow Labs +name: recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline` is a English model originally trained by comartinez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en_5.5.0_3.0_1725444910441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline_en_5.5.0_3.0_1725444910441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipes_trainer_wwm_sen_3_sep_true_prefix_true_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/comartinez/recipes-trainer-wwm_sen_3_sep_True_prefix_True + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-relation_detection_textual_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-relation_detection_textual_pipeline_en.md new file mode 100644 index 00000000000000..9ededb9b3ec30a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-relation_detection_textual_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English relation_detection_textual_pipeline pipeline CamemBertForSequenceClassification from lupobricco +author: John Snow Labs +name: relation_detection_textual_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`relation_detection_textual_pipeline` is a English model originally trained by lupobricco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/relation_detection_textual_pipeline_en_5.5.0_3.0_1725467029344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/relation_detection_textual_pipeline_en_5.5.0_3.0_1725467029344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("relation_detection_textual_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("relation_detection_textual_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|relation_detection_textual_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.5 MB| + +## References + +https://huggingface.co/lupobricco/relation_detection_textual + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_en.md b/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_en.md new file mode 100644 index 00000000000000..c4679a42e1f343 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English results_gkumi DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: results_gkumi +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_gkumi` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_gkumi_en_5.5.0_3.0_1725448734449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_gkumi_en_5.5.0_3.0_1725448734449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("results_gkumi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("results_gkumi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_gkumi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Gkumi/results \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_pipeline_en.md new file mode 100644 index 00000000000000..5318d3123facd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-results_gkumi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English results_gkumi_pipeline pipeline DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: results_gkumi_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_gkumi_pipeline` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_gkumi_pipeline_en_5.5.0_3.0_1725448746370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_gkumi_pipeline_en_5.5.0_3.0_1725448746370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("results_gkumi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("results_gkumi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_gkumi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Gkumi/results + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-results_raj_sharma01_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-results_raj_sharma01_pipeline_en.md new file mode 100644 index 00000000000000..4b4f92b27c82c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-results_raj_sharma01_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English results_raj_sharma01_pipeline pipeline DistilBertForTokenClassification from Raj-sharma01 +author: John Snow Labs +name: results_raj_sharma01_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_raj_sharma01_pipeline` is a English model originally trained by Raj-sharma01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_raj_sharma01_pipeline_en_5.5.0_3.0_1725492667899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_raj_sharma01_pipeline_en_5.5.0_3.0_1725492667899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("results_raj_sharma01_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("results_raj_sharma01_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_raj_sharma01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/Raj-sharma01/results + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_en.md b/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_en.md new file mode 100644 index 00000000000000..a32017a115c39c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English reward_model_deberta_v3_large DeBertaForSequenceClassification from OpenAssistant +author: John Snow Labs +name: reward_model_deberta_v3_large +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reward_model_deberta_v3_large` is a English model originally trained by OpenAssistant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_large_en_5.5.0_3.0_1725463722115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_large_en_5.5.0_3.0_1725463722115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("reward_model_deberta_v3_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("reward_model_deberta_v3_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reward_model_deberta_v3_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/OpenAssistant/reward-model-deberta-v3-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_pipeline_en.md new file mode 100644 index 00000000000000..6eb7bb8237f75a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-reward_model_deberta_v3_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English reward_model_deberta_v3_large_pipeline pipeline DeBertaForSequenceClassification from OpenAssistant +author: John Snow Labs +name: reward_model_deberta_v3_large_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reward_model_deberta_v3_large_pipeline` is a English model originally trained by OpenAssistant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_large_pipeline_en_5.5.0_3.0_1725463801401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_large_pipeline_en_5.5.0_3.0_1725463801401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("reward_model_deberta_v3_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("reward_model_deberta_v3_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reward_model_deberta_v3_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/OpenAssistant/reward-model-deberta-v3-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-rise_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-rise_ner_pipeline_en.md new file mode 100644 index 00000000000000..2dc97c965ad587 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-rise_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rise_ner_pipeline pipeline DistilBertForTokenClassification from mappelgren +author: John Snow Labs +name: rise_ner_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rise_ner_pipeline` is a English model originally trained by mappelgren. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rise_ner_pipeline_en_5.5.0_3.0_1725476006369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rise_ner_pipeline_en_5.5.0_3.0_1725476006369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rise_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rise_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rise_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/mappelgren/RISE_NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-robako_base_asante_twi_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-robako_base_asante_twi_uncased_pipeline_en.md new file mode 100644 index 00000000000000..c1b7c42fc24ae1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-robako_base_asante_twi_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English robako_base_asante_twi_uncased_pipeline pipeline RoBertaEmbeddings from Ghana-NLP +author: John Snow Labs +name: robako_base_asante_twi_uncased_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robako_base_asante_twi_uncased_pipeline` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robako_base_asante_twi_uncased_pipeline_en_5.5.0_3.0_1725412678556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robako_base_asante_twi_uncased_pipeline_en_5.5.0_3.0_1725412678556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("robako_base_asante_twi_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("robako_base_asante_twi_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robako_base_asante_twi_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.8 MB| + +## References + +https://huggingface.co/Ghana-NLP/robako-base-asante-twi-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_bne_finetuned_suicide_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_bne_finetuned_suicide_spanish_pipeline_es.md new file mode 100644 index 00000000000000..e5ca949c72da71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_bne_finetuned_suicide_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_base_bne_finetuned_suicide_spanish_pipeline pipeline RoBertaForSequenceClassification from somosnlp-hackathon-2023 +author: John Snow Labs +name: roberta_base_bne_finetuned_suicide_spanish_pipeline +date: 2024-09-04 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_bne_finetuned_suicide_spanish_pipeline` is a Castilian, Spanish model originally trained by somosnlp-hackathon-2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_suicide_spanish_pipeline_es_5.5.0_3.0_1725452474636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_suicide_spanish_pipeline_es_5.5.0_3.0_1725452474636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_bne_finetuned_suicide_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_bne_finetuned_suicide_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_bne_finetuned_suicide_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|438.0 MB| + +## References + +https://huggingface.co/somosnlp-hackathon-2023/roberta-base-bne-finetuned-suicide-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_en.md new file mode 100644 index 00000000000000..eb6a2623d0f8a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_fake_news_tfg RoBertaForSequenceClassification from LittleFish-Coder +author: John Snow Labs +name: roberta_base_fake_news_tfg +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_fake_news_tfg` is a English model originally trained by LittleFish-Coder. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_fake_news_tfg_en_5.5.0_3.0_1725485721448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_fake_news_tfg_en_5.5.0_3.0_1725485721448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_fake_news_tfg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_fake_news_tfg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_fake_news_tfg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/LittleFish-Coder/roberta-base-fake-news-tfg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_pipeline_en.md new file mode 100644 index 00000000000000..17268c4986600b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fake_news_tfg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_fake_news_tfg_pipeline pipeline RoBertaForSequenceClassification from LittleFish-Coder +author: John Snow Labs +name: roberta_base_fake_news_tfg_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_fake_news_tfg_pipeline` is a English model originally trained by LittleFish-Coder. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_fake_news_tfg_pipeline_en_5.5.0_3.0_1725485745934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_fake_news_tfg_pipeline_en_5.5.0_3.0_1725485745934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_fake_news_tfg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_fake_news_tfg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_fake_news_tfg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/LittleFish-Coder/roberta-base-fake-news-tfg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fine_tuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fine_tuned_pipeline_en.md new file mode 100644 index 00000000000000..47970ac71a41b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_fine_tuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_base_fine_tuned_pipeline pipeline RoBertaForQuestionAnswering from ClemMbote +author: John Snow Labs +name: roberta_base_fine_tuned_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_fine_tuned_pipeline` is a English model originally trained by ClemMbote. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_fine_tuned_pipeline_en_5.5.0_3.0_1725479210384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_fine_tuned_pipeline_en_5.5.0_3.0_1725479210384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_fine_tuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_fine_tuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_fine_tuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|450.3 MB| + +## References + +https://huggingface.co/ClemMbote/roberta-base-fine-tuned + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_hate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_hate_pipeline_en.md new file mode 100644 index 00000000000000..02dd17f9aceeb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_hate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_hate_pipeline pipeline RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: roberta_base_hate_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_hate_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_hate_pipeline_en_5.5.0_3.0_1725485476354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_hate_pipeline_en_5.5.0_3.0_1725485476354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_hate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_hate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_hate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/cardiffnlp/roberta-base-hate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_sqaud2_on_medical_meadow_medqa_v1_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_sqaud2_on_medical_meadow_medqa_v1_en.md new file mode 100644 index 00000000000000..40f90ed259259c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_sqaud2_on_medical_meadow_medqa_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_base_sqaud2_on_medical_meadow_medqa_v1 RoBertaForQuestionAnswering from themariolinml +author: John Snow Labs +name: roberta_base_sqaud2_on_medical_meadow_medqa_v1 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_sqaud2_on_medical_meadow_medqa_v1` is a English model originally trained by themariolinml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_sqaud2_on_medical_meadow_medqa_v1_en_5.5.0_3.0_1725479969136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_sqaud2_on_medical_meadow_medqa_v1_en_5.5.0_3.0_1725479969136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_sqaud2_on_medical_meadow_medqa_v1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_sqaud2_on_medical_meadow_medqa_v1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_sqaud2_on_medical_meadow_medqa_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.8 MB| + +## References + +https://huggingface.co/themariolinml/roberta-base-sqaud2-on-medical_meadow_medqa-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..6a49b92225228b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_base_squad2_finetuned_pipeline pipeline RoBertaForQuestionAnswering from pluemnvd01 +author: John Snow Labs +name: roberta_base_squad2_finetuned_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_finetuned_pipeline` is a English model originally trained by pluemnvd01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_finetuned_pipeline_en_5.5.0_3.0_1725483600409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_finetuned_pipeline_en_5.5.0_3.0_1725483600409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_squad2_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_squad2_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.8 MB| + +## References + +https://huggingface.co/pluemnvd01/roberta-base-squad2-finetuned + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_roberta_en.md new file mode 100644 index 00000000000000..02ba28c4df2964 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_base_squad2_finetuned_roberta_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_base_squad2_finetuned_roberta RoBertaForQuestionAnswering from srirammadduri-ts +author: John Snow Labs +name: roberta_base_squad2_finetuned_roberta +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_finetuned_roberta` is a English model originally trained by srirammadduri-ts. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_finetuned_roberta_en_5.5.0_3.0_1725480106986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_finetuned_roberta_en_5.5.0_3.0_1725480106986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_finetuned_roberta","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_finetuned_roberta", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_finetuned_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/srirammadduri-ts/roberta-base-squad2-finetuned-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es.md new file mode 100644 index 00000000000000..a177a4be8fc01d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline pipeline RoBertaForSequenceClassification from JonatanGk +author: John Snow Labs +name: roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline +date: 2024-09-04 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline` is a Castilian, Spanish model originally trained by JonatanGk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es_5.5.0_3.0_1725452825547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline_es_5.5.0_3.0_1725452825547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_classifier_base_bne_finetuned_cyberbullying_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|464.7 MB| + +## References + +https://huggingface.co/JonatanGk/roberta-base-bne-finetuned-cyberbullying-spanish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_en.md new file mode 100644 index 00000000000000..a0c24c649e639a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_location RoBertaForQuestionAnswering from svo2 +author: John Snow Labs +name: roberta_finetuned_location +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_location` is a English model originally trained by svo2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_location_en_5.5.0_3.0_1725479714735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_location_en_5.5.0_3.0_1725479714735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_location","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_location", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_location| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/svo2/roberta-finetuned-location \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_pipeline_en.md new file mode 100644 index 00000000000000..44daa8f96e93ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_location_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_finetuned_location_pipeline pipeline RoBertaForQuestionAnswering from svo2 +author: John Snow Labs +name: roberta_finetuned_location_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_location_pipeline` is a English model originally trained by svo2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_location_pipeline_en_5.5.0_3.0_1725479737576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_location_pipeline_en_5.5.0_3.0_1725479737576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_finetuned_location_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_finetuned_location_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_location_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/svo2/roberta-finetuned-location + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_machinesfaults_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_machinesfaults_en.md new file mode 100644 index 00000000000000..8037b8c76907a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_machinesfaults_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_machinesfaults RoBertaForQuestionAnswering from mbarte +author: John Snow Labs +name: roberta_finetuned_machinesfaults +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_machinesfaults` is a English model originally trained by mbarte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_machinesfaults_en_5.5.0_3.0_1725483720363.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_machinesfaults_en_5.5.0_3.0_1725483720363.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_machinesfaults","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_machinesfaults", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_machinesfaults| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/mbarte/roberta_finetuned_machinesfaults \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_1110pm_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_1110pm_en.md new file mode 100644 index 00000000000000..86d70ef100d756 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_1110pm_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_1110pm RoBertaForQuestionAnswering from 96harsh56 +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_1110pm +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_1110pm` is a English model originally trained by 96harsh56. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_1110pm_en_5.5.0_3.0_1725451695820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_1110pm_en_5.5.0_3.0_1725451695820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_1110pm","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_1110pm", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_1110pm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.5 MB| + +## References + +https://huggingface.co/96harsh56/roberta-finetuned-subjqa-movies_1110pm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en.md new file mode 100644 index 00000000000000..2e90cc5d8fd534 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline pipeline RoBertaForQuestionAnswering from SoumikNayak +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline` is a English model originally trained by SoumikNayak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en_5.5.0_3.0_1725483357931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline_en_5.5.0_3.0_1725483357931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_soumiknayak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/SoumikNayak/roberta-finetuned-subjqa-movies_2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_human_label_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_human_label_pipeline_en.md new file mode 100644 index 00000000000000..eef13dd833e742 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_human_label_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_human_label_pipeline pipeline RoBertaForSequenceClassification from Multiperspective +author: John Snow Labs +name: roberta_human_label_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_human_label_pipeline` is a English model originally trained by Multiperspective. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_human_label_pipeline_en_5.5.0_3.0_1725452324246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_human_label_pipeline_en_5.5.0_3.0_1725452324246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_human_label_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_human_label_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_human_label_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Multiperspective/roberta-human-label + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en.md new file mode 100644 index 00000000000000..323526b004cc1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_indosquadv2_1691412431_8_2e_05_0_01_5 RoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: roberta_indosquadv2_1691412431_8_2e_05_0_01_5 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_indosquadv2_1691412431_8_2e_05_0_01_5` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en_5.5.0_3.0_1725484001240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_indosquadv2_1691412431_8_2e_05_0_01_5_en_5.5.0_3.0_1725484001240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_indosquadv2_1691412431_8_2e_05_0_01_5","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_indosquadv2_1691412431_8_2e_05_0_01_5", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_indosquadv2_1691412431_8_2e_05_0_01_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|454.4 MB| + +## References + +https://huggingface.co/rizquuula/RoBERTa-IndoSQuADv2_1691412431-8-2e-05-0.01-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en.md new file mode 100644 index 00000000000000..beb858a682e5d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline pipeline RoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en_5.5.0_3.0_1725484022923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline_en_5.5.0_3.0_1725484022923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_indosquadv2_1691412431_8_2e_05_0_01_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|454.4 MB| + +## References + +https://huggingface.co/rizquuula/RoBERTa-IndoSQuADv2_1691412431-8-2e-05-0.01-5 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_large_bne_sqac_es.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_large_bne_sqac_es.md new file mode 100644 index 00000000000000..a1e1162b622d99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_large_bne_sqac_es.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Castilian, Spanish roberta_large_bne_sqac RoBertaForQuestionAnswering from BSC-LT +author: John Snow Labs +name: roberta_large_bne_sqac +date: 2024-09-04 +tags: [es, open_source, onnx, question_answering, roberta] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_bne_sqac` is a Castilian, Spanish model originally trained by BSC-LT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_bne_sqac_es_5.5.0_3.0_1725479936708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_bne_sqac_es_5.5.0_3.0_1725479936708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_large_bne_sqac","es") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_large_bne_sqac", "es") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_bne_sqac| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|es| +|Size:|1.3 GB| + +## References + +https://huggingface.co/BSC-LT/roberta-large-bne-sqac \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_model_abdulrahman4111_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_model_abdulrahman4111_pipeline_en.md new file mode 100644 index 00000000000000..31d9e3d8db02a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_model_abdulrahman4111_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_model_abdulrahman4111_pipeline pipeline RoBertaForSequenceClassification from abdulrahman4111 +author: John Snow Labs +name: roberta_model_abdulrahman4111_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_model_abdulrahman4111_pipeline` is a English model originally trained by abdulrahman4111. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_model_abdulrahman4111_pipeline_en_5.5.0_3.0_1725452533617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_model_abdulrahman4111_pipeline_en_5.5.0_3.0_1725452533617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_model_abdulrahman4111_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_model_abdulrahman4111_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_model_abdulrahman4111_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|300.4 MB| + +## References + +https://huggingface.co/abdulrahman4111/roberta-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_mrqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_mrqa_pipeline_en.md new file mode 100644 index 00000000000000..2e28fa94875740 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_mrqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_mrqa_pipeline pipeline RoBertaForQuestionAnswering from enriquesaou +author: John Snow Labs +name: roberta_mrqa_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_mrqa_pipeline` is a English model originally trained by enriquesaou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_mrqa_pipeline_en_5.5.0_3.0_1725483902600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_mrqa_pipeline_en_5.5.0_3.0_1725483902600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_mrqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_mrqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_mrqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.9 MB| + +## References + +https://huggingface.co/enriquesaou/roberta-mrqa + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_en.md new file mode 100644 index 00000000000000..cb412dd7793963 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from Mr-Wick) +author: John Snow Labs +name: roberta_qa_Roberta +date: 2024-09-04 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `Roberta` is a English model originally trained by `Mr-Wick`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_Roberta_en_5.5.0_3.0_1725451110266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_Roberta_en_5.5.0_3.0_1725451110266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_Roberta","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_Roberta","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.by_Mr-Wick").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_Roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|462.9 MB| + +## References + +References + +- https://huggingface.co/Mr-Wick/Roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_pipeline_en.md new file mode 100644 index 00000000000000..3642f107a6338e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_Roberta_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_Roberta_pipeline pipeline RoBertaForQuestionAnswering from nlpunibo +author: John Snow Labs +name: roberta_qa_Roberta_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_Roberta_pipeline` is a English model originally trained by nlpunibo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_Roberta_pipeline_en_5.5.0_3.0_1725451136126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_Roberta_pipeline_en_5.5.0_3.0_1725451136126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_Roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_Roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_Roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|462.9 MB| + +## References + +https://huggingface.co/nlpunibo/roberta + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_TestQaV1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_TestQaV1_pipeline_en.md new file mode 100644 index 00000000000000..6783ca66e5499a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_TestQaV1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_TestQaV1_pipeline pipeline RoBertaForQuestionAnswering from Andranik +author: John Snow Labs +name: roberta_qa_TestQaV1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_TestQaV1_pipeline` is a English model originally trained by Andranik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_TestQaV1_pipeline_en_5.5.0_3.0_1725479321664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_TestQaV1_pipeline_en_5.5.0_3.0_1725479321664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_TestQaV1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_TestQaV1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_TestQaV1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/Andranik/TestQaV1 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_ai_club_inductions_21_nlp_roBERTa_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_ai_club_inductions_21_nlp_roBERTa_en.md new file mode 100644 index 00000000000000..1834ccaf1c519b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_ai_club_inductions_21_nlp_roBERTa_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from AyushPJ) +author: John Snow Labs +name: roberta_qa_ai_club_inductions_21_nlp_roBERTa +date: 2024-09-04 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `ai-club-inductions-21-nlp-roBERTa` is a English model originally trained by `AyushPJ`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_ai_club_inductions_21_nlp_roBERTa_en_5.5.0_3.0_1725451220382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_ai_club_inductions_21_nlp_roBERTa_en_5.5.0_3.0_1725451220382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_ai_club_inductions_21_nlp_roBERTa","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_ai_club_inductions_21_nlp_roBERTa","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.by_AyushPJ").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_ai_club_inductions_21_nlp_roBERTa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.6 MB| + +## References + +References + +- https://huggingface.co/AyushPJ/ai-club-inductions-21-nlp-roBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_spanish_squades_becasincentivos2_es.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_spanish_squades_becasincentivos2_es.md new file mode 100644 index 00000000000000..813342d902a19c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_spanish_squades_becasincentivos2_es.md @@ -0,0 +1,92 @@ +--- +layout: model +title: Spanish RobertaForQuestionAnswering Base Cased model (from Evelyn18) +author: John Snow Labs +name: roberta_qa_base_spanish_squades_becasincentivos2 +date: 2024-09-04 +tags: [es, open_source, roberta, question_answering, onnx] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-base-spanish-squades-becasIncentivos2` is a Spanish model originally trained by `Evelyn18`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos2_es_5.5.0_3.0_1725479283065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos2_es_5.5.0_3.0_1725479283065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_spanish_squades_becasincentivos2","es")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_spanish_squades_becasincentivos2","es") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_spanish_squades_becasincentivos2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|es| +|Size:|459.1 MB| + +## References + +References + +- https://huggingface.co/Evelyn18/roberta-base-spanish-squades-becasIncentivos2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_en.md new file mode 100644 index 00000000000000..2f8e8598d90028 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English RobertaForQuestionAnswering Base Cased model (from nbroad) +author: John Snow Labs +name: roberta_qa_base_super_1 +date: 2024-09-04 +tags: [en, open_source, roberta, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `rob-base-superqa1` is a English model originally trained by `nbroad`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_super_1_en_5.5.0_3.0_1725451114544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_super_1_en_5.5.0_3.0_1725451114544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_super_1","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_super_1","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_super_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.4 MB| + +## References + +References + +- https://huggingface.co/nbroad/rob-base-superqa1 +- https://paperswithcode.com/sota?task=Question+Answering&dataset=adversarial_qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_pipeline_en.md new file mode 100644 index 00000000000000..b92d4650704a9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_base_super_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_base_super_1_pipeline pipeline RoBertaForQuestionAnswering from nbroad +author: John Snow Labs +name: roberta_qa_base_super_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_base_super_1_pipeline` is a English model originally trained by nbroad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_super_1_pipeline_en_5.5.0_3.0_1725451142598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_super_1_pipeline_en_5.5.0_3.0_1725451142598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_base_super_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_base_super_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_super_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.4 MB| + +## References + +https://huggingface.co/nbroad/rob-base-superqa1 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_finetuned_city_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_finetuned_city_pipeline_en.md new file mode 100644 index 00000000000000..3170720f6f59c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_finetuned_city_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_finetuned_city_pipeline pipeline RoBertaForQuestionAnswering from skandaonsolve +author: John Snow Labs +name: roberta_qa_finetuned_city_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_finetuned_city_pipeline` is a English model originally trained by skandaonsolve. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_finetuned_city_pipeline_en_5.5.0_3.0_1725450776127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_finetuned_city_pipeline_en_5.5.0_3.0_1725450776127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_finetuned_city_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_finetuned_city_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_finetuned_city_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.9 MB| + +## References + +https://huggingface.co/skandaonsolve/roberta-finetuned-city + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_fpdm_hier_roberta_FT_newsqa_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_fpdm_hier_roberta_FT_newsqa_en.md new file mode 100644 index 00000000000000..911a0b476a8f98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_fpdm_hier_roberta_FT_newsqa_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from AnonymousSub) +author: John Snow Labs +name: roberta_qa_fpdm_hier_roberta_FT_newsqa +date: 2024-09-04 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `fpdm_hier_roberta_FT_newsqa` is a English model originally trained by `AnonymousSub`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_fpdm_hier_roberta_FT_newsqa_en_5.5.0_3.0_1725451174339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_fpdm_hier_roberta_FT_newsqa_en_5.5.0_3.0_1725451174339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_fpdm_hier_roberta_FT_newsqa","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_fpdm_hier_roberta_FT_newsqa","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.news.roberta.qa_fpdm_hier_roberta_ft_newsqa.by_AnonymousSub").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_fpdm_hier_roberta_FT_newsqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|457.8 MB| + +## References + +References + +- https://huggingface.co/AnonymousSub/fpdm_hier_roberta_FT_newsqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en.md new file mode 100644 index 00000000000000..83a57da69c8b69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English RobertaForQuestionAnswering Large Cased model (from anas-awadalla) +author: John Snow Labs +name: roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4 +date: 2024-09-04 +tags: [en, open_source, roberta, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-large-few-shot-k-1024-finetuned-squad-seed-4` is a English model originally trained by `anas-awadalla`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en_5.5.0_3.0_1725478972233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4_en_5.5.0_3.0_1725478972233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_large_few_shot_k_1024_finetuned_squad_seed_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/anas-awadalla/roberta-large-few-shot-k-1024-finetuned-squad-seed-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_news_pretrain_roberta_FT_newsqa_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_news_pretrain_roberta_FT_newsqa_en.md new file mode 100644 index 00000000000000..3f6dee8f08c43e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_news_pretrain_roberta_FT_newsqa_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from AnonymousSub) +author: John Snow Labs +name: roberta_qa_news_pretrain_roberta_FT_newsqa +date: 2024-09-04 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `news_pretrain_roberta_FT_newsqa` is a English model originally trained by `AnonymousSub`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_news_pretrain_roberta_FT_newsqa_en_5.5.0_3.0_1725479461479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_news_pretrain_roberta_FT_newsqa_en_5.5.0_3.0_1725479461479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_news_pretrain_roberta_FT_newsqa","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_news_pretrain_roberta_FT_newsqa","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.news.roberta.qa_ft.by_AnonymousSub").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_news_pretrain_roberta_FT_newsqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|466.3 MB| + +## References + +References + +- https://huggingface.co/AnonymousSub/news_pretrain_roberta_FT_newsqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en.md new file mode 100644 index 00000000000000..8d2dc9c0063a6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from anas-awadalla) +author: John Snow Labs +name: roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2 +date: 2024-09-04 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-base-few-shot-k-32-finetuned-squad-seed-2` is a English model originally trained by `anas-awadalla`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en_5.5.0_3.0_1725479226024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_en_5.5.0_3.0_1725479226024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.roberta.base_32d_seed_2").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|417.0 MB| + +## References + +References + +- https://huggingface.co/anas-awadalla/roberta-base-few-shot-k-32-finetuned-squad-seed-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..d92ddf17c588f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline pipeline RoBertaForQuestionAnswering from anas-awadalla +author: John Snow Labs +name: roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en_5.5.0_3.0_1725479268525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline_en_5.5.0_3.0_1725479268525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_base_few_shot_k_32_finetuned_squad_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|417.0 MB| + +## References + +https://huggingface.co/anas-awadalla/roberta-base-few-shot-k-32-finetuned-squad-seed-2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en.md new file mode 100644 index 00000000000000..0f6fe40add91d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English RobertaForQuestionAnswering Cased model (from AnonymousSub) +author: John Snow Labs +name: roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs +date: 2024-09-04 +tags: [en, open_source, roberta, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `squadv2-recipe-roberta-tokenwise-token-and-step-losses-3-epochs` is a English model originally trained by `AnonymousSub`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en_5.5.0_3.0_1725451154326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs_en_5.5.0_3.0_1725451154326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_squadv2_recipe_tokenwise_token_and_step_losses_3_epochs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|466.3 MB| + +## References + +References + +- https://huggingface.co/AnonymousSub/squadv2-recipe-roberta-tokenwise-token-and-step-losses-3-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_en.md new file mode 100644 index 00000000000000..df65da64557806 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_s2orc_books_wiki_bpe_32k CamemBertEmbeddings from nfliu +author: John Snow Labs +name: roberta_s2orc_books_wiki_bpe_32k +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_s2orc_books_wiki_bpe_32k` is a English model originally trained by nfliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_s2orc_books_wiki_bpe_32k_en_5.5.0_3.0_1725409663088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_s2orc_books_wiki_bpe_32k_en_5.5.0_3.0_1725409663088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("roberta_s2orc_books_wiki_bpe_32k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("roberta_s2orc_books_wiki_bpe_32k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_s2orc_books_wiki_bpe_32k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|263.3 MB| + +## References + +https://huggingface.co/nfliu/roberta_s2orc_books_wiki_bpe_32k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_pipeline_en.md new file mode 100644 index 00000000000000..7013ecb54f75ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-roberta_s2orc_books_wiki_bpe_32k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_s2orc_books_wiki_bpe_32k_pipeline pipeline CamemBertEmbeddings from nfliu +author: John Snow Labs +name: roberta_s2orc_books_wiki_bpe_32k_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_s2orc_books_wiki_bpe_32k_pipeline` is a English model originally trained by nfliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_s2orc_books_wiki_bpe_32k_pipeline_en_5.5.0_3.0_1725409737468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_s2orc_books_wiki_bpe_32k_pipeline_en_5.5.0_3.0_1725409737468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_s2orc_books_wiki_bpe_32k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_s2orc_books_wiki_bpe_32k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_s2orc_books_wiki_bpe_32k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|263.3 MB| + +## References + +https://huggingface.co/nfliu/roberta_s2orc_books_wiki_bpe_32k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en.md new file mode 100644 index 00000000000000..6bc4f4d7021411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rotten_tomatoes_microsoft_deberta_v3_large_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: rotten_tomatoes_microsoft_deberta_v3_large_seed_1 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rotten_tomatoes_microsoft_deberta_v3_large_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725467685228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725467685228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("rotten_tomatoes_microsoft_deberta_v3_large_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("rotten_tomatoes_microsoft_deberta_v3_large_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rotten_tomatoes_microsoft_deberta_v3_large_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/rotten_tomatoes_microsoft_deberta-v3-large_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en.md new file mode 100644 index 00000000000000..d4b18489aef936 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725467817730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725467817730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rotten_tomatoes_microsoft_deberta_v3_large_seed_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/rotten_tomatoes_microsoft_deberta-v3-large_seed-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-same_story_en.md b/docs/_posts/ahmedlone127/2024-09-04-same_story_en.md new file mode 100644 index 00000000000000..8383988ae0f8ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-same_story_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English same_story MPNetEmbeddings from dell-research-harvard +author: John Snow Labs +name: same_story +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`same_story` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/same_story_en_5.5.0_3.0_1725469866240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/same_story_en_5.5.0_3.0_1725469866240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("same_story","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("same_story","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|same_story| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/dell-research-harvard/same-story \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-scandibert_norwegian_faroese_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-scandibert_norwegian_faroese_pipeline_xx.md new file mode 100644 index 00000000000000..485015297c38cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-scandibert_norwegian_faroese_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual scandibert_norwegian_faroese_pipeline pipeline RoBertaEmbeddings from vesteinn +author: John Snow Labs +name: scandibert_norwegian_faroese_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scandibert_norwegian_faroese_pipeline` is a Multilingual model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scandibert_norwegian_faroese_pipeline_xx_5.5.0_3.0_1725413112674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scandibert_norwegian_faroese_pipeline_xx_5.5.0_3.0_1725413112674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scandibert_norwegian_faroese_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scandibert_norwegian_faroese_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scandibert_norwegian_faroese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|298.4 MB| + +## References + +https://huggingface.co/vesteinn/ScandiBERT-no-faroese + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-schemeclassifier3_eng_dial_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-schemeclassifier3_eng_dial_pipeline_en.md new file mode 100644 index 00000000000000..8faefe5d37c342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-schemeclassifier3_eng_dial_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English schemeclassifier3_eng_dial_pipeline pipeline RoBertaForSequenceClassification from raruidol +author: John Snow Labs +name: schemeclassifier3_eng_dial_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`schemeclassifier3_eng_dial_pipeline` is a English model originally trained by raruidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/schemeclassifier3_eng_dial_pipeline_en_5.5.0_3.0_1725452467449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/schemeclassifier3_eng_dial_pipeline_en_5.5.0_3.0_1725452467449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("schemeclassifier3_eng_dial_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("schemeclassifier3_eng_dial_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|schemeclassifier3_eng_dial_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/raruidol/SchemeClassifier3-ENG-Dial + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-04-scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en.md new file mode 100644 index 00000000000000..cccd49b8d051c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner DeBertaForTokenClassification from sohamtiwari3120 +author: John Snow Labs +name: scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, deberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner` is a English model originally trained by sohamtiwari3120. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en_5.5.0_3.0_1725473411921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner_en_5.5.0_3.0_1725473411921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DeBertaForTokenClassification.pretrained("scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DeBertaForTokenClassification.pretrained("scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scideberta_czech_tdm_pretrained_finetuned_ner_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|560.8 MB| + +## References + +https://huggingface.co/sohamtiwari3120/scideberta-cs-tdm-pretrained-finetuned-ner-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-search_shield_en.md b/docs/_posts/ahmedlone127/2024-09-04-search_shield_en.md new file mode 100644 index 00000000000000..7aa47f1d7f3533 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-search_shield_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English search_shield DistilBertForSequenceClassification from shivamkumaramehta +author: John Snow Labs +name: search_shield +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`search_shield` is a English model originally trained by shivamkumaramehta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/search_shield_en_5.5.0_3.0_1725490277085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/search_shield_en_5.5.0_3.0_1725490277085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("search_shield","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("search_shield", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|search_shield| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/shivamkumaramehta/Search-Shield \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en.md b/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en.md new file mode 100644 index 00000000000000..d65288d0f2afd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sembr2023_distilbert_base_uncased_finetuned_sst_2_english DistilBertForTokenClassification from admko +author: John Snow Labs +name: sembr2023_distilbert_base_uncased_finetuned_sst_2_english +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sembr2023_distilbert_base_uncased_finetuned_sst_2_english` is a English model originally trained by admko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en_5.5.0_3.0_1725492914260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sembr2023_distilbert_base_uncased_finetuned_sst_2_english_en_5.5.0_3.0_1725492914260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("sembr2023_distilbert_base_uncased_finetuned_sst_2_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("sembr2023_distilbert_base_uncased_finetuned_sst_2_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sembr2023_distilbert_base_uncased_finetuned_sst_2_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/admko/sembr2023-distilbert-base-uncased-finetuned-sst-2-english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en.md new file mode 100644 index 00000000000000..70de5f49d00220 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline pipeline DistilBertForTokenClassification from admko +author: John Snow Labs +name: sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline` is a English model originally trained by admko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en_5.5.0_3.0_1725492926162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline_en_5.5.0_3.0_1725492926162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sembr2023_distilbert_base_uncased_finetuned_sst_2_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/admko/sembr2023-distilbert-base-uncased-finetuned-sst-2-english + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_arabert_c19_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-04-sent_arabert_c19_pipeline_ar.md new file mode 100644 index 00000000000000..76b30a621fb77a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_arabert_c19_pipeline_ar.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Arabic sent_arabert_c19_pipeline pipeline BertSentenceEmbeddings from moha +author: John Snow Labs +name: sent_arabert_c19_pipeline +date: 2024-09-04 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_arabert_c19_pipeline` is a Arabic model originally trained by moha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_arabert_c19_pipeline_ar_5.5.0_3.0_1725434477329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_arabert_c19_pipeline_ar_5.5.0_3.0_1725434477329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_arabert_c19_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_arabert_c19_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_arabert_c19_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|505.5 MB| + +## References + +https://huggingface.co/moha/arabert_c19 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_batterybert_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_batterybert_uncased_en.md new file mode 100644 index 00000000000000..29eb4314289613 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_batterybert_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_batterybert_uncased BertSentenceEmbeddings from batterydata +author: John Snow Labs +name: sent_batterybert_uncased +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_batterybert_uncased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_batterybert_uncased_en_5.5.0_3.0_1725454161596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_batterybert_uncased_en_5.5.0_3.0_1725454161596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_batterybert_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_batterybert_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_batterybert_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/batterydata/batterybert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_cased_dbmdz_de.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_cased_dbmdz_de.md new file mode 100644 index 00000000000000..50f9bcc00fb7db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_cased_dbmdz_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German sent_bert_base_german_cased_dbmdz BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_base_german_cased_dbmdz +date: 2024-09-04 +tags: [de, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_german_cased_dbmdz` is a German model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_cased_dbmdz_de_5.5.0_3.0_1725415733444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_cased_dbmdz_de_5.5.0_3.0_1725415733444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_german_cased_dbmdz","de") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_german_cased_dbmdz","de") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_german_cased_dbmdz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|409.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-german-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_dbmdz_uncased_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_dbmdz_uncased_pipeline_de.md new file mode 100644 index 00000000000000..0f0642fd9b3695 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_german_dbmdz_uncased_pipeline_de.md @@ -0,0 +1,71 @@ +--- +layout: model +title: German sent_bert_base_german_dbmdz_uncased_pipeline pipeline BertSentenceEmbeddings from google-bert +author: John Snow Labs +name: sent_bert_base_german_dbmdz_uncased_pipeline +date: 2024-09-04 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_german_dbmdz_uncased_pipeline` is a German model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_dbmdz_uncased_pipeline_de_5.5.0_3.0_1725454048643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_dbmdz_uncased_pipeline_de_5.5.0_3.0_1725454048643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_german_dbmdz_uncased_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_german_dbmdz_uncased_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_german_dbmdz_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|410.4 MB| + +## References + +https://huggingface.co/google-bert/bert-base-german-dbmdz-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_it.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_it.md new file mode 100644 index 00000000000000..9d8143ce33266b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_it.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Italian sent_bert_base_italian_cased_dbmdz BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_base_italian_cased_dbmdz +date: 2024-09-04 +tags: [it, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_italian_cased_dbmdz` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_italian_cased_dbmdz_it_5.5.0_3.0_1725416211559.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_italian_cased_dbmdz_it_5.5.0_3.0_1725416211559.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_italian_cased_dbmdz","it") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_italian_cased_dbmdz","it") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_italian_cased_dbmdz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_pipeline_it.md new file mode 100644 index 00000000000000..7f2db2692c36df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_italian_cased_dbmdz_pipeline_it.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Italian sent_bert_base_italian_cased_dbmdz_pipeline pipeline BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_base_italian_cased_dbmdz_pipeline +date: 2024-09-04 +tags: [it, open_source, pipeline, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_italian_cased_dbmdz_pipeline` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_italian_cased_dbmdz_pipeline_it_5.5.0_3.0_1725416234593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_italian_cased_dbmdz_pipeline_it_5.5.0_3.0_1725416234593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_italian_cased_dbmdz_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_italian_cased_dbmdz_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_italian_cased_dbmdz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|410.2 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_en.md new file mode 100644 index 00000000000000..fb236f4ccdd30c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_uncased_eurlex BertSentenceEmbeddings from nlpaueb +author: John Snow Labs +name: sent_bert_base_uncased_eurlex +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_uncased_eurlex` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_eurlex_en_5.5.0_3.0_1725434277240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_eurlex_en_5.5.0_3.0_1725434277240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_uncased_eurlex","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_uncased_eurlex","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_uncased_eurlex| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-eurlex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_pipeline_en.md new file mode 100644 index 00000000000000..0c1d95e7e47c45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_base_uncased_eurlex_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_uncased_eurlex_pipeline pipeline BertSentenceEmbeddings from nlpaueb +author: John Snow Labs +name: sent_bert_base_uncased_eurlex_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_uncased_eurlex_pipeline` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_eurlex_pipeline_en_5.5.0_3.0_1725434297312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_eurlex_pipeline_en_5.5.0_3.0_1725434297312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_uncased_eurlex_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_uncased_eurlex_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_uncased_eurlex_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-eurlex + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_distil_ita_legal_bert_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_distil_ita_legal_bert_en.md new file mode 100644 index 00000000000000..7b80e11344b8f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_distil_ita_legal_bert_en.md @@ -0,0 +1,77 @@ +--- +layout: model +title: English Legal BERT Sentence Embedding Cased model +author: John Snow Labs +name: sent_bert_distil_ita_legal_bert +date: 2024-09-04 +tags: [bert, en, embeddings, sentence, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Legal BERT Sentence Embedding model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `distil-ita-legal-bert` is a English model originally trained by `dlicari`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_distil_ita_legal_bert_en_5.5.0_3.0_1725415787606.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_distil_ita_legal_bert_en_5.5.0_3.0_1725415787606.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_distil_ita_legal_bert", "en") \ +.setInputCols("sentence") \ +.setOutputCol("bert_sentence") + +nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, sent_embeddings ]) + result = pipeline.fit(data).transform(data) +``` +```scala +vval sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_distil_ita_legal_bert", "en") +.setInputCols("sentence") +.setOutputCol("bert_sentence") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, sent_embeddings )) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_distil_ita_legal_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|200.4 MB| + +## References + +References + +- https://huggingface.co/dlicari/distil-ita-legal-bert +- https://www.SBERT.net +- https://seb.sbert.net?model_name=%7BMODEL_NAME%7D \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_persian_farsi_zwnj_base_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_persian_farsi_zwnj_base_pipeline_fa.md new file mode 100644 index 00000000000000..c5a77ebac7f44c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_persian_farsi_zwnj_base_pipeline_fa.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Persian sent_bert_persian_farsi_zwnj_base_pipeline pipeline BertSentenceEmbeddings from HooshvareLab +author: John Snow Labs +name: sent_bert_persian_farsi_zwnj_base_pipeline +date: 2024-09-04 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_persian_farsi_zwnj_base_pipeline` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_persian_farsi_zwnj_base_pipeline_fa_5.5.0_3.0_1725415962363.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_persian_farsi_zwnj_base_pipeline_fa_5.5.0_3.0_1725415962363.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_persian_farsi_zwnj_base_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_persian_farsi_zwnj_base_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_persian_farsi_zwnj_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|442.1 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-fa-zwnj-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_pipeline_xx.md new file mode 100644 index 00000000000000..e0719d053349e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_bert_tiny_historic_multilingual_cased_pipeline pipeline BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_tiny_historic_multilingual_cased_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_tiny_historic_multilingual_cased_pipeline` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_tiny_historic_multilingual_cased_pipeline_xx_5.5.0_3.0_1725454247572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_tiny_historic_multilingual_cased_pipeline_xx_5.5.0_3.0_1725454247572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_tiny_historic_multilingual_cased_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_tiny_historic_multilingual_cased_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_tiny_historic_multilingual_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|17.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-tiny-historic-multilingual-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..bf9aa207699b1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bert_tiny_historic_multilingual_cased_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_bert_tiny_historic_multilingual_cased BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_tiny_historic_multilingual_cased +date: 2024-09-04 +tags: [xx, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_tiny_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_tiny_historic_multilingual_cased_xx_5.5.0_3.0_1725454246385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_tiny_historic_multilingual_cased_xx_5.5.0_3.0_1725454246385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_tiny_historic_multilingual_cased","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_tiny_historic_multilingual_cased","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_tiny_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|17.4 MB| + +## References + +https://huggingface.co/dbmdz/bert-tiny-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bertu_pipeline_mt.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bertu_pipeline_mt.md new file mode 100644 index 00000000000000..2ec2d4d8d22ccf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bertu_pipeline_mt.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Maltese sent_bertu_pipeline pipeline BertSentenceEmbeddings from MLRS +author: John Snow Labs +name: sent_bertu_pipeline +date: 2024-09-04 +tags: [mt, open_source, pipeline, onnx] +task: Embeddings +language: mt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bertu_pipeline` is a Maltese model originally trained by MLRS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bertu_pipeline_mt_5.5.0_3.0_1725434446477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bertu_pipeline_mt_5.5.0_3.0_1725434446477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bertu_pipeline", lang = "mt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bertu_pipeline", lang = "mt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bertu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mt| +|Size:|469.2 MB| + +## References + +https://huggingface.co/MLRS/BERTu + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_bertweet_persian_farsi_fa.md b/docs/_posts/ahmedlone127/2024-09-04-sent_bertweet_persian_farsi_fa.md new file mode 100644 index 00000000000000..e6b13aef7e73e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_bertweet_persian_farsi_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian sent_bertweet_persian_farsi BertSentenceEmbeddings from arm-on +author: John Snow Labs +name: sent_bertweet_persian_farsi +date: 2024-09-04 +tags: [fa, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bertweet_persian_farsi` is a Persian model originally trained by arm-on. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bertweet_persian_farsi_fa_5.5.0_3.0_1725454602480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bertweet_persian_farsi_fa_5.5.0_3.0_1725454602480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bertweet_persian_farsi","fa") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bertweet_persian_farsi","fa") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bertweet_persian_farsi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|fa| +|Size:|405.8 MB| + +## References + +https://huggingface.co/arm-on/BERTweet-FA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_pipeline_tr.md new file mode 100644 index 00000000000000..c96e033b315c93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_pipeline_tr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Turkish sent_convbert_base_turkish_mc4_uncased_pipeline pipeline BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_convbert_base_turkish_mc4_uncased_pipeline +date: 2024-09-04 +tags: [tr, open_source, pipeline, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_convbert_base_turkish_mc4_uncased_pipeline` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_uncased_pipeline_tr_5.5.0_3.0_1725416221791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_uncased_pipeline_tr_5.5.0_3.0_1725416221791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_convbert_base_turkish_mc4_uncased_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_convbert_base_turkish_mc4_uncased_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_convbert_base_turkish_mc4_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|400.6 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_tr.md b/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_tr.md new file mode 100644 index 00000000000000..e83ef63c2bacbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_convbert_base_turkish_mc4_uncased_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish sent_convbert_base_turkish_mc4_uncased BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_convbert_base_turkish_mc4_uncased +date: 2024-09-04 +tags: [tr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_convbert_base_turkish_mc4_uncased` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_uncased_tr_5.5.0_3.0_1725416201169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_uncased_tr_5.5.0_3.0_1725416201169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_convbert_base_turkish_mc4_uncased","tr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_convbert_base_turkish_mc4_uncased","tr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_convbert_base_turkish_mc4_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|400.0 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_darijabert_ar.md b/docs/_posts/ahmedlone127/2024-09-04-sent_darijabert_ar.md new file mode 100644 index 00000000000000..16b79701e6cf65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_darijabert_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic sent_darijabert BertSentenceEmbeddings from SI2M-Lab +author: John Snow Labs +name: sent_darijabert +date: 2024-09-04 +tags: [ar, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_darijabert` is a Arabic model originally trained by SI2M-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_darijabert_ar_5.5.0_3.0_1725415789909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_darijabert_ar_5.5.0_3.0_1725415789909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_darijabert","ar") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_darijabert","ar") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_darijabert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|551.5 MB| + +## References + +https://huggingface.co/SI2M-Lab/DarijaBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx.md new file mode 100644 index 00000000000000..f51b306b858841 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from huawei-noah +author: John Snow Labs +name: sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline` is a Multilingual model originally trained by huawei-noah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx_5.5.0_3.0_1725420619219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline_xx_5.5.0_3.0_1725420619219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_entitycs_39_pep_malay_mlm_xlmr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|945.0 MB| + +## References + +https://huggingface.co/huawei-noah/EntityCS-39-PEP_MS_MLM-xlmr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_wep_xlmr_base_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_wep_xlmr_base_pipeline_xx.md new file mode 100644 index 00000000000000..54ff5de4c1a0e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_entitycs_39_wep_xlmr_base_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_entitycs_39_wep_xlmr_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from huawei-noah +author: John Snow Labs +name: sent_entitycs_39_wep_xlmr_base_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_entitycs_39_wep_xlmr_base_pipeline` is a Multilingual model originally trained by huawei-noah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_wep_xlmr_base_pipeline_xx_5.5.0_3.0_1725419298384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_wep_xlmr_base_pipeline_xx_5.5.0_3.0_1725419298384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_entitycs_39_wep_xlmr_base_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_entitycs_39_wep_xlmr_base_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_entitycs_39_wep_xlmr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|945.4 MB| + +## References + +https://huggingface.co/huawei-noah/EntityCS-39-WEP-xlmr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_finbert_pretrain_yiyanghkust_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_finbert_pretrain_yiyanghkust_en.md new file mode 100644 index 00000000000000..979bae39821b1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_finbert_pretrain_yiyanghkust_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_finbert_pretrain_yiyanghkust BertSentenceEmbeddings from yiyanghkust +author: John Snow Labs +name: sent_finbert_pretrain_yiyanghkust +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_finbert_pretrain_yiyanghkust` is a English model originally trained by yiyanghkust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_finbert_pretrain_yiyanghkust_en_5.5.0_3.0_1725454045651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_finbert_pretrain_yiyanghkust_en_5.5.0_3.0_1725454045651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_finbert_pretrain_yiyanghkust","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_finbert_pretrain_yiyanghkust","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_finbert_pretrain_yiyanghkust| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/yiyanghkust/finbert-pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_fa.md b/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_fa.md new file mode 100644 index 00000000000000..59d851fa4f2fc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian sent_hafez_bert BertSentenceEmbeddings from ViravirastSHZ +author: John Snow Labs +name: sent_hafez_bert +date: 2024-09-04 +tags: [fa, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hafez_bert` is a Persian model originally trained by ViravirastSHZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hafez_bert_fa_5.5.0_3.0_1725453934721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hafez_bert_fa_5.5.0_3.0_1725453934721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hafez_bert","fa") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hafez_bert","fa") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hafez_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|fa| +|Size:|408.2 MB| + +## References + +https://huggingface.co/ViravirastSHZ/Hafez_Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_pipeline_fa.md new file mode 100644 index 00000000000000..c54dc5887ec435 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_hafez_bert_pipeline_fa.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Persian sent_hafez_bert_pipeline pipeline BertSentenceEmbeddings from ViravirastSHZ +author: John Snow Labs +name: sent_hafez_bert_pipeline +date: 2024-09-04 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hafez_bert_pipeline` is a Persian model originally trained by ViravirastSHZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hafez_bert_pipeline_fa_5.5.0_3.0_1725453958589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hafez_bert_pipeline_fa_5.5.0_3.0_1725453958589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hafez_bert_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hafez_bert_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hafez_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|408.7 MB| + +## References + +https://huggingface.co/ViravirastSHZ/Hafez_Bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_en.md new file mode 100644 index 00000000000000..9e530f0ceffc45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_indicbertv2_mlm_only BertSentenceEmbeddings from ai4bharat +author: John Snow Labs +name: sent_indicbertv2_mlm_only +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_indicbertv2_mlm_only` is a English model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_only_en_5.5.0_3.0_1725434229498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_only_en_5.5.0_3.0_1725434229498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_indicbertv2_mlm_only","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_indicbertv2_mlm_only","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_indicbertv2_mlm_only| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_pipeline_en.md new file mode 100644 index 00000000000000..4fef09aad5e47f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_indicbertv2_mlm_only_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_indicbertv2_mlm_only_pipeline pipeline BertSentenceEmbeddings from ai4bharat +author: John Snow Labs +name: sent_indicbertv2_mlm_only_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_indicbertv2_mlm_only_pipeline` is a English model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_only_pipeline_en_5.5.0_3.0_1725434282856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_only_pipeline_en_5.5.0_3.0_1725434282856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_indicbertv2_mlm_only_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_indicbertv2_mlm_only_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_indicbertv2_mlm_only_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-only + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_inlegalbert_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_inlegalbert_en.md new file mode 100644 index 00000000000000..a0b884e7d5beee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_inlegalbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_inlegalbert BertSentenceEmbeddings from law-ai +author: John Snow Labs +name: sent_inlegalbert +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_inlegalbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_inlegalbert_en_5.5.0_3.0_1725453907815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_inlegalbert_en_5.5.0_3.0_1725453907815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_inlegalbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_inlegalbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_inlegalbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/law-ai/InLegalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_legal_bert_base_uncased_nlpaueb_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_legal_bert_base_uncased_nlpaueb_en.md new file mode 100644 index 00000000000000..0782a69941765b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_legal_bert_base_uncased_nlpaueb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_legal_bert_base_uncased_nlpaueb BertSentenceEmbeddings from nlpaueb +author: John Snow Labs +name: sent_legal_bert_base_uncased_nlpaueb +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legal_bert_base_uncased_nlpaueb` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_nlpaueb_en_5.5.0_3.0_1725453977818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_uncased_nlpaueb_en_5.5.0_3.0_1725453977818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_uncased_nlpaueb","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_uncased_nlpaueb","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legal_bert_base_uncased_nlpaueb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/nlpaueb/legal-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_ar.md b/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_ar.md new file mode 100644 index 00000000000000..67e133fb3cdd81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic sent_marbert BertSentenceEmbeddings from UBC-NLP +author: John Snow Labs +name: sent_marbert +date: 2024-09-04 +tags: [ar, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marbert` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marbert_ar_5.5.0_3.0_1725416174263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marbert_ar_5.5.0_3.0_1725416174263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_marbert","ar") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_marbert","ar") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|608.7 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_pipeline_ar.md new file mode 100644 index 00000000000000..ca345985163c50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_marbert_pipeline_ar.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Arabic sent_marbert_pipeline pipeline BertSentenceEmbeddings from UBC-NLP +author: John Snow Labs +name: sent_marbert_pipeline +date: 2024-09-04 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marbert_pipeline` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marbert_pipeline_ar_5.5.0_3.0_1725416206778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marbert_pipeline_ar_5.5.0_3.0_1725416206778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_marbert_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_marbert_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|609.2 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_norbert2_no.md b/docs/_posts/ahmedlone127/2024-09-04-sent_norbert2_no.md new file mode 100644 index 00000000000000..07ed08561d34d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_norbert2_no.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Norwegian sent_norbert2 BertSentenceEmbeddings from ltg +author: John Snow Labs +name: sent_norbert2 +date: 2024-09-04 +tags: ["no", open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_norbert2` is a Norwegian model originally trained by ltg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_norbert2_no_5.5.0_3.0_1725453801178.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_norbert2_no_5.5.0_3.0_1725453801178.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_norbert2","no") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_norbert2","no") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_norbert2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|465.2 MB| + +## References + +https://huggingface.co/ltg/norbert2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_patentbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_patentbert_pipeline_en.md new file mode 100644 index 00000000000000..8283a44cd6262f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_patentbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_patentbert_pipeline pipeline BertSentenceEmbeddings from dheerajpai +author: John Snow Labs +name: sent_patentbert_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_patentbert_pipeline` is a English model originally trained by dheerajpai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_patentbert_pipeline_en_5.5.0_3.0_1725454172019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_patentbert_pipeline_en_5.5.0_3.0_1725454172019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_patentbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_patentbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_patentbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|75.9 MB| + +## References + +https://huggingface.co/dheerajpai/patentbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_tiny_biobert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_tiny_biobert_pipeline_en.md new file mode 100644 index 00000000000000..353493e79b10e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_tiny_biobert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_tiny_biobert_pipeline pipeline BertSentenceEmbeddings from nlpie +author: John Snow Labs +name: sent_tiny_biobert_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tiny_biobert_pipeline` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tiny_biobert_pipeline_en_5.5.0_3.0_1725454296154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tiny_biobert_pipeline_en_5.5.0_3.0_1725454296154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tiny_biobert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tiny_biobert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tiny_biobert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|52.5 MB| + +## References + +https://huggingface.co/nlpie/tiny-biobert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_en.md new file mode 100644 index 00000000000000..847ebc76b5ab16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_vetbert BertSentenceEmbeddings from havocy28 +author: John Snow Labs +name: sent_vetbert +date: 2024-09-04 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_vetbert` is a English model originally trained by havocy28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_vetbert_en_5.5.0_3.0_1725415945209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_vetbert_en_5.5.0_3.0_1725415945209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_vetbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_vetbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_vetbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.8 MB| + +## References + +https://huggingface.co/havocy28/VetBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_pipeline_en.md new file mode 100644 index 00000000000000..ac825caaf02261 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_vetbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_vetbert_pipeline pipeline BertSentenceEmbeddings from havocy28 +author: John Snow Labs +name: sent_vetbert_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_vetbert_pipeline` is a English model originally trained by havocy28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_vetbert_pipeline_en_5.5.0_3.0_1725415966989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_vetbert_pipeline_en_5.5.0_3.0_1725415966989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_vetbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_vetbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_vetbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/havocy28/VetBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sent_xlm_roberta_base_finetuned_luganda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sent_xlm_roberta_base_finetuned_luganda_pipeline_en.md new file mode 100644 index 00000000000000..03f9e2c08a0ef1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sent_xlm_roberta_base_finetuned_luganda_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_luganda_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_luganda_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_luganda_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_luganda_pipeline_en_5.5.0_3.0_1725421049015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_luganda_pipeline_en_5.5.0_3.0_1725421049015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_luganda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_luganda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_luganda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-luganda + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_en.md b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_en.md new file mode 100644 index 00000000000000..560fc4f49f768b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentencepiecebpe_cc100_french CamemBertEmbeddings from BioMedTok +author: John Snow Labs +name: sentencepiecebpe_cc100_french +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencepiecebpe_cc100_french` is a English model originally trained by BioMedTok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_en_5.5.0_3.0_1725408583191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_en_5.5.0_3.0_1725408583191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("sentencepiecebpe_cc100_french","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("sentencepiecebpe_cc100_french","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencepiecebpe_cc100_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/BioMedTok/SentencePieceBPE-CC100-FR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_morphemes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_morphemes_pipeline_en.md new file mode 100644 index 00000000000000..87947007fc2645 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_morphemes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentencepiecebpe_cc100_french_morphemes_pipeline pipeline CamemBertEmbeddings from BioMedTok +author: John Snow Labs +name: sentencepiecebpe_cc100_french_morphemes_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencepiecebpe_cc100_french_morphemes_pipeline` is a English model originally trained by BioMedTok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_morphemes_pipeline_en_5.5.0_3.0_1725441928932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_morphemes_pipeline_en_5.5.0_3.0_1725441928932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencepiecebpe_cc100_french_morphemes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencepiecebpe_cc100_french_morphemes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencepiecebpe_cc100_french_morphemes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/BioMedTok/SentencePieceBPE-CC100-FR-Morphemes + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_pipeline_en.md new file mode 100644 index 00000000000000..caaa5248011b6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sentencepiecebpe_cc100_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentencepiecebpe_cc100_french_pipeline pipeline CamemBertEmbeddings from BioMedTok +author: John Snow Labs +name: sentencepiecebpe_cc100_french_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencepiecebpe_cc100_french_pipeline` is a English model originally trained by BioMedTok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_pipeline_en_5.5.0_3.0_1725408605019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_cc100_french_pipeline_en_5.5.0_3.0_1725408605019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencepiecebpe_cc100_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencepiecebpe_cc100_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencepiecebpe_cc100_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/BioMedTok/SentencePieceBPE-CC100-FR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-serbian_test_clip_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-serbian_test_clip_pipeline_en.md new file mode 100644 index 00000000000000..a930ab8eb89868 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-serbian_test_clip_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English serbian_test_clip_pipeline pipeline CLIPForZeroShotClassification from aurelio-ai +author: John Snow Labs +name: serbian_test_clip_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`serbian_test_clip_pipeline` is a English model originally trained by aurelio-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/serbian_test_clip_pipeline_en_5.5.0_3.0_1725456656152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/serbian_test_clip_pipeline_en_5.5.0_3.0_1725456656152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("serbian_test_clip_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("serbian_test_clip_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|serbian_test_clip_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|547.1 KB| + +## References + +https://huggingface.co/aurelio-ai/sr-test-clip + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-shus_en.md b/docs/_posts/ahmedlone127/2024-09-04-shus_en.md new file mode 100644 index 00000000000000..a4b398e03839ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-shus_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English shus CLIPForZeroShotClassification from hughtayloe +author: John Snow Labs +name: shus +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`shus` is a English model originally trained by hughtayloe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/shus_en_5.5.0_3.0_1725456389508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/shus_en_5.5.0_3.0_1725456389508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("shus","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("shus","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|shus| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/hughtayloe/shus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-shus_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-shus_pipeline_en.md new file mode 100644 index 00000000000000..13df1dde35b027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-shus_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English shus_pipeline pipeline CLIPForZeroShotClassification from hughtayloe +author: John Snow Labs +name: shus_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`shus_pipeline` is a English model originally trained by hughtayloe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/shus_pipeline_en_5.5.0_3.0_1725456668538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/shus_pipeline_en_5.5.0_3.0_1725456668538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("shus_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("shus_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|shus_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/hughtayloe/shus + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_base_ccnet_stsb25_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_base_ccnet_stsb25_pipeline_en.md new file mode 100644 index 00000000000000..fe742212448ff0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_base_ccnet_stsb25_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sitexsometre_camembert_base_ccnet_stsb25_pipeline pipeline CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_base_ccnet_stsb25_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_base_ccnet_stsb25_pipeline` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_base_ccnet_stsb25_pipeline_en_5.5.0_3.0_1725466689231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_base_ccnet_stsb25_pipeline_en_5.5.0_3.0_1725466689231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sitexsometre_camembert_base_ccnet_stsb25_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sitexsometre_camembert_base_ccnet_stsb25_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_base_ccnet_stsb25_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.8 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-base-ccnet-stsb25 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_large_stsb25_en.md b/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_large_stsb25_en.md new file mode 100644 index 00000000000000..845439851ea33d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-sitexsometre_camembert_large_stsb25_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sitexsometre_camembert_large_stsb25 CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_large_stsb25 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_large_stsb25` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb25_en_5.5.0_3.0_1725466977989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb25_en_5.5.0_3.0_1725466977989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb25","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb25", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_large_stsb25| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|805.5 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-large-stsb25 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-southern_sotho_all_mpnet_finetuned_comb_3000_en.md b/docs/_posts/ahmedlone127/2024-09-04-southern_sotho_all_mpnet_finetuned_comb_3000_en.md new file mode 100644 index 00000000000000..0bfa0939627261 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-southern_sotho_all_mpnet_finetuned_comb_3000_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English southern_sotho_all_mpnet_finetuned_comb_3000 MPNetEmbeddings from danfeg +author: John Snow Labs +name: southern_sotho_all_mpnet_finetuned_comb_3000 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_all_mpnet_finetuned_comb_3000` is a English model originally trained by danfeg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_3000_en_5.5.0_3.0_1725470229435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_3000_en_5.5.0_3.0_1725470229435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_comb_3000","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_comb_3000","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_all_mpnet_finetuned_comb_3000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/danfeg/ST-ALL-MPNET_Finetuned-COMB-3000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_en.md b/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_en.md new file mode 100644 index 00000000000000..862448d3fc53a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English spark_name_arabic_tonga_tonga_islands_english MarianTransformer from ihebaker10 +author: John Snow Labs +name: spark_name_arabic_tonga_tonga_islands_english +date: 2024-09-04 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spark_name_arabic_tonga_tonga_islands_english` is a English model originally trained by ihebaker10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spark_name_arabic_tonga_tonga_islands_english_en_5.5.0_3.0_1725494374166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spark_name_arabic_tonga_tonga_islands_english_en_5.5.0_3.0_1725494374166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("spark_name_arabic_tonga_tonga_islands_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("spark_name_arabic_tonga_tonga_islands_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spark_name_arabic_tonga_tonga_islands_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.6 MB| + +## References + +https://huggingface.co/ihebaker10/spark-name-ar-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..f4c1a8a6625a1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-spark_name_arabic_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spark_name_arabic_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from ihebaker10 +author: John Snow Labs +name: spark_name_arabic_tonga_tonga_islands_english_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spark_name_arabic_tonga_tonga_islands_english_pipeline` is a English model originally trained by ihebaker10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spark_name_arabic_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725494399385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spark_name_arabic_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725494399385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spark_name_arabic_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spark_name_arabic_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spark_name_arabic_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.1 MB| + +## References + +https://huggingface.co/ihebaker10/spark-name-ar-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-stt_best_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-stt_best_pipeline_en.md new file mode 100644 index 00000000000000..e684bc80472e42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-stt_best_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English stt_best_pipeline pipeline WhisperForCTC from benghoula +author: John Snow Labs +name: stt_best_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stt_best_pipeline` is a English model originally trained by benghoula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stt_best_pipeline_en_5.5.0_3.0_1725427113531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stt_best_pipeline_en_5.5.0_3.0_1725427113531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stt_best_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stt_best_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stt_best_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/benghoula/stt_best + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tara_roberta_base_persian_farsi_qa_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-04-tara_roberta_base_persian_farsi_qa_pipeline_fa.md new file mode 100644 index 00000000000000..182764d3ddfbd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tara_roberta_base_persian_farsi_qa_pipeline_fa.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Persian tara_roberta_base_persian_farsi_qa_pipeline pipeline RoBertaForQuestionAnswering from hosseinhimself +author: John Snow Labs +name: tara_roberta_base_persian_farsi_qa_pipeline +date: 2024-09-04 +tags: [fa, open_source, pipeline, onnx] +task: Question Answering +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tara_roberta_base_persian_farsi_qa_pipeline` is a Persian model originally trained by hosseinhimself. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tara_roberta_base_persian_farsi_qa_pipeline_fa_5.5.0_3.0_1725450771187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tara_roberta_base_persian_farsi_qa_pipeline_fa_5.5.0_3.0_1725450771187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tara_roberta_base_persian_farsi_qa_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tara_roberta_base_persian_farsi_qa_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tara_roberta_base_persian_farsi_qa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|463.6 MB| + +## References + +https://huggingface.co/hosseinhimself/tara-roberta-base-fa-qa + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_en.md b/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_en.md new file mode 100644 index 00000000000000..70ff89407ed724 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tenseprediction DistilBertForTokenClassification from rahulkhandelw +author: John Snow Labs +name: tenseprediction +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tenseprediction` is a English model originally trained by rahulkhandelw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tenseprediction_en_5.5.0_3.0_1725448361553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tenseprediction_en_5.5.0_3.0_1725448361553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("tenseprediction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("tenseprediction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tenseprediction| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rahulkhandelw/TensePrediction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_pipeline_en.md new file mode 100644 index 00000000000000..f5c48004c3333d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tenseprediction_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tenseprediction_pipeline pipeline DistilBertForTokenClassification from rahulkhandelw +author: John Snow Labs +name: tenseprediction_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tenseprediction_pipeline` is a English model originally trained by rahulkhandelw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tenseprediction_pipeline_en_5.5.0_3.0_1725448373345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tenseprediction_pipeline_en_5.5.0_3.0_1725448373345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tenseprediction_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tenseprediction_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tenseprediction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rahulkhandelw/TensePrediction + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-test3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-test3_pipeline_en.md new file mode 100644 index 00000000000000..6c6c5d6fdd304e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-test3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test3_pipeline pipeline DistilBertForTokenClassification from yam1ke +author: John Snow Labs +name: test3_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test3_pipeline` is a English model originally trained by yam1ke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test3_pipeline_en_5.5.0_3.0_1725460363982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test3_pipeline_en_5.5.0_3.0_1725460363982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yam1ke/test3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-test_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-test_1_en.md new file mode 100644 index 00000000000000..4dbd884973bc35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-test_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_1 MPNetEmbeddings from diegoicomp +author: John Snow Labs +name: test_1 +date: 2024-09-04 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_1` is a English model originally trained by diegoicomp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_1_en_5.5.0_3.0_1725470147241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_1_en_5.5.0_3.0_1725470147241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("test_1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("test_1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/diegoicomp/test-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-test_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-test_model_pipeline_en.md new file mode 100644 index 00000000000000..5a03910d693563 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-test_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_model_pipeline pipeline DistilBertForTokenClassification from natalierobbins +author: John Snow Labs +name: test_model_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_model_pipeline` is a English model originally trained by natalierobbins. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_model_pipeline_en_5.5.0_3.0_1725492565320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_model_pipeline_en_5.5.0_3.0_1725492565320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/natalierobbins/test_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-test_trainer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-test_trainer_pipeline_en.md new file mode 100644 index 00000000000000..7cd79286470573 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-test_trainer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_trainer_pipeline pipeline CamemBertForSequenceClassification from AntoineD +author: John Snow Labs +name: test_trainer_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_trainer_pipeline` is a English model originally trained by AntoineD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_trainer_pipeline_en_5.5.0_3.0_1725480747623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_trainer_pipeline_en_5.5.0_3.0_1725480747623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_trainer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_trainer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_trainer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.7 MB| + +## References + +https://huggingface.co/AntoineD/test_trainer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_en.md b/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_en.md new file mode 100644 index 00000000000000..2b17b135e6a2ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English text_classification_nolora DistilBertForSequenceClassification from Intradiction +author: John Snow Labs +name: text_classification_nolora +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`text_classification_nolora` is a English model originally trained by Intradiction. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/text_classification_nolora_en_5.5.0_3.0_1725489676435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/text_classification_nolora_en_5.5.0_3.0_1725489676435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("text_classification_nolora","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("text_classification_nolora", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|text_classification_nolora| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Intradiction/text_classification_NoLORA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_pipeline_en.md new file mode 100644 index 00000000000000..e4138460d5a0c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-text_classification_nolora_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English text_classification_nolora_pipeline pipeline DistilBertForSequenceClassification from Intradiction +author: John Snow Labs +name: text_classification_nolora_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`text_classification_nolora_pipeline` is a English model originally trained by Intradiction. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/text_classification_nolora_pipeline_en_5.5.0_3.0_1725489688868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/text_classification_nolora_pipeline_en_5.5.0_3.0_1725489688868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("text_classification_nolora_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("text_classification_nolora_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|text_classification_nolora_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Intradiction/text_classification_NoLORA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_en.md b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_en.md new file mode 100644 index 00000000000000..4bc81fba20d8cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tiny_random_albertfortokenclassification AlbertForTokenClassification from hf-tiny-model-private +author: John Snow Labs +name: tiny_random_albertfortokenclassification +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_random_albertfortokenclassification` is a English model originally trained by hf-tiny-model-private. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_random_albertfortokenclassification_en_5.5.0_3.0_1725486765081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_random_albertfortokenclassification_en_5.5.0_3.0_1725486765081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("tiny_random_albertfortokenclassification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("tiny_random_albertfortokenclassification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_random_albertfortokenclassification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|15.2 MB| + +## References + +https://huggingface.co/hf-tiny-model-private/tiny-random-AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_pipeline_en.md new file mode 100644 index 00000000000000..8dd16e33286c9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_albertfortokenclassification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tiny_random_albertfortokenclassification_pipeline pipeline AlbertForTokenClassification from hf-tiny-model-private +author: John Snow Labs +name: tiny_random_albertfortokenclassification_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_random_albertfortokenclassification_pipeline` is a English model originally trained by hf-tiny-model-private. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_random_albertfortokenclassification_pipeline_en_5.5.0_3.0_1725486766206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_random_albertfortokenclassification_pipeline_en_5.5.0_3.0_1725486766206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tiny_random_albertfortokenclassification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tiny_random_albertfortokenclassification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_random_albertfortokenclassification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|15.2 MB| + +## References + +https://huggingface.co/hf-tiny-model-private/tiny-random-AlbertForTokenClassification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tiny_random_bertfortokenclassification_ydshieh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_bertfortokenclassification_ydshieh_pipeline_en.md new file mode 100644 index 00000000000000..3db96d99718a10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tiny_random_bertfortokenclassification_ydshieh_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tiny_random_bertfortokenclassification_ydshieh_pipeline pipeline BertForTokenClassification from ydshieh +author: John Snow Labs +name: tiny_random_bertfortokenclassification_ydshieh_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_random_bertfortokenclassification_ydshieh_pipeline` is a English model originally trained by ydshieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_random_bertfortokenclassification_ydshieh_pipeline_en_5.5.0_3.0_1725477874446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_random_bertfortokenclassification_ydshieh_pipeline_en_5.5.0_3.0_1725477874446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tiny_random_bertfortokenclassification_ydshieh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tiny_random_bertfortokenclassification_ydshieh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_random_bertfortokenclassification_ydshieh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|379.5 KB| + +## References + +https://huggingface.co/ydshieh/tiny-random-BertForTokenClassification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tinyclip_vit_39m_16_text_19m_yfcc15m_en.md b/docs/_posts/ahmedlone127/2024-09-04-tinyclip_vit_39m_16_text_19m_yfcc15m_en.md new file mode 100644 index 00000000000000..eea4ac30a447e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tinyclip_vit_39m_16_text_19m_yfcc15m_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English tinyclip_vit_39m_16_text_19m_yfcc15m CLIPForZeroShotClassification from wkcn +author: John Snow Labs +name: tinyclip_vit_39m_16_text_19m_yfcc15m +date: 2024-09-04 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinyclip_vit_39m_16_text_19m_yfcc15m` is a English model originally trained by wkcn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinyclip_vit_39m_16_text_19m_yfcc15m_en_5.5.0_3.0_1725492054473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinyclip_vit_39m_16_text_19m_yfcc15m_en_5.5.0_3.0_1725492054473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("tinyclip_vit_39m_16_text_19m_yfcc15m","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("tinyclip_vit_39m_16_text_19m_yfcc15m","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinyclip_vit_39m_16_text_19m_yfcc15m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|200.2 MB| + +## References + +https://huggingface.co/wkcn/TinyCLIP-ViT-39M-16-Text-19M-YFCC15M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en.md new file mode 100644 index 00000000000000..4cdf2bd7c534e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English tinyroberta_squad2_finetuned_emrqa_msquad_pipeline pipeline RoBertaForQuestionAnswering from Eladio +author: John Snow Labs +name: tinyroberta_squad2_finetuned_emrqa_msquad_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinyroberta_squad2_finetuned_emrqa_msquad_pipeline` is a English model originally trained by Eladio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en_5.5.0_3.0_1725483540670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinyroberta_squad2_finetuned_emrqa_msquad_pipeline_en_5.5.0_3.0_1725483540670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tinyroberta_squad2_finetuned_emrqa_msquad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tinyroberta_squad2_finetuned_emrqa_msquad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinyroberta_squad2_finetuned_emrqa_msquad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.9 MB| + +## References + +https://huggingface.co/Eladio/tinyroberta-squad2-finetuned-emrqa-msquad + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_en.md b/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_en.md new file mode 100644 index 00000000000000..f5796636015a07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English token_classification_hemg DistilBertForTokenClassification from Hemg +author: John Snow Labs +name: token_classification_hemg +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_hemg` is a English model originally trained by Hemg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_hemg_en_5.5.0_3.0_1725492913061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_hemg_en_5.5.0_3.0_1725492913061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("token_classification_hemg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("token_classification_hemg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_hemg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Hemg/token-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_pipeline_en.md new file mode 100644 index 00000000000000..785d05f880724d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-token_classification_hemg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English token_classification_hemg_pipeline pipeline DistilBertForTokenClassification from Hemg +author: John Snow Labs +name: token_classification_hemg_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_hemg_pipeline` is a English model originally trained by Hemg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_hemg_pipeline_en_5.5.0_3.0_1725492925236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_hemg_pipeline_en_5.5.0_3.0_1725492925236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("token_classification_hemg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("token_classification_hemg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_hemg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Hemg/token-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_en.md b/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_en.md new file mode 100644 index 00000000000000..77e42e87c36534 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English token_classification_park_hip_02 DistilBertForTokenClassification from Park-Hip-02 +author: John Snow Labs +name: token_classification_park_hip_02 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_park_hip_02` is a English model originally trained by Park-Hip-02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_park_hip_02_en_5.5.0_3.0_1725448199076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_park_hip_02_en_5.5.0_3.0_1725448199076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("token_classification_park_hip_02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("token_classification_park_hip_02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_park_hip_02| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Park-Hip-02/token-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_pipeline_en.md new file mode 100644 index 00000000000000..b5435699220f9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-token_classification_park_hip_02_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English token_classification_park_hip_02_pipeline pipeline DistilBertForTokenClassification from Park-Hip-02 +author: John Snow Labs +name: token_classification_park_hip_02_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_park_hip_02_pipeline` is a English model originally trained by Park-Hip-02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_park_hip_02_pipeline_en_5.5.0_3.0_1725448219309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_park_hip_02_pipeline_en_5.5.0_3.0_1725448219309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("token_classification_park_hip_02_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("token_classification_park_hip_02_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_park_hip_02_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Park-Hip-02/token-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-trained_danish_en.md b/docs/_posts/ahmedlone127/2024-09-04-trained_danish_en.md new file mode 100644 index 00000000000000..613b07412e9fda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-trained_danish_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trained_danish DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: trained_danish +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_danish` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_danish_en_5.5.0_3.0_1725492780249.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_danish_en_5.5.0_3.0_1725492780249.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("trained_danish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("trained_danish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_danish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/trained_danish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-trained_danish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-trained_danish_pipeline_en.md new file mode 100644 index 00000000000000..36cffdb01c3680 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-trained_danish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trained_danish_pipeline pipeline DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: trained_danish_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_danish_pipeline` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_danish_pipeline_en_5.5.0_3.0_1725492804399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_danish_pipeline_en_5.5.0_3.0_1725492804399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trained_danish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trained_danish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_danish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/trained_danish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_10epoch_en.md b/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_10epoch_en.md new file mode 100644 index 00000000000000..55dd1613ba4459 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_10epoch_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trust_merged_dataset_mdeberta_v3_10epoch DeBertaForSequenceClassification from luisespinosa +author: John Snow Labs +name: trust_merged_dataset_mdeberta_v3_10epoch +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trust_merged_dataset_mdeberta_v3_10epoch` is a English model originally trained by luisespinosa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trust_merged_dataset_mdeberta_v3_10epoch_en_5.5.0_3.0_1725467962776.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trust_merged_dataset_mdeberta_v3_10epoch_en_5.5.0_3.0_1725467962776.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("trust_merged_dataset_mdeberta_v3_10epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("trust_merged_dataset_mdeberta_v3_10epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trust_merged_dataset_mdeberta_v3_10epoch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|822.8 MB| + +## References + +https://huggingface.co/luisespinosa/trust-merged_dataset_mdeberta-v3_10epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_1epoch_en.md b/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_1epoch_en.md new file mode 100644 index 00000000000000..5e367cce381242 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-trust_merged_dataset_mdeberta_v3_1epoch_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trust_merged_dataset_mdeberta_v3_1epoch DeBertaForSequenceClassification from luisespinosa +author: John Snow Labs +name: trust_merged_dataset_mdeberta_v3_1epoch +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trust_merged_dataset_mdeberta_v3_1epoch` is a English model originally trained by luisespinosa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trust_merged_dataset_mdeberta_v3_1epoch_en_5.5.0_3.0_1725438484985.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trust_merged_dataset_mdeberta_v3_1epoch_en_5.5.0_3.0_1725438484985.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("trust_merged_dataset_mdeberta_v3_1epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("trust_merged_dataset_mdeberta_v3_1epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trust_merged_dataset_mdeberta_v3_1epoch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|821.0 MB| + +## References + +https://huggingface.co/luisespinosa/trust-merged_dataset_mdeberta-v3_1epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-tupy_bert_large_binary_classifier_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-04-tupy_bert_large_binary_classifier_pipeline_pt.md new file mode 100644 index 00000000000000..030fecebd2df2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-tupy_bert_large_binary_classifier_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese tupy_bert_large_binary_classifier_pipeline pipeline BertForSequenceClassification from Silly-Machine +author: John Snow Labs +name: tupy_bert_large_binary_classifier_pipeline +date: 2024-09-04 +tags: [pt, open_source, pipeline, onnx] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tupy_bert_large_binary_classifier_pipeline` is a Portuguese model originally trained by Silly-Machine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tupy_bert_large_binary_classifier_pipeline_pt_5.5.0_3.0_1725432662333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tupy_bert_large_binary_classifier_pipeline_pt_5.5.0_3.0_1725432662333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tupy_bert_large_binary_classifier_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tupy_bert_large_binary_classifier_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tupy_bert_large_binary_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Silly-Machine/TuPy-Bert-Large-Binary-Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-twitter_paraphrase_embeddings_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-twitter_paraphrase_embeddings_pipeline_en.md new file mode 100644 index 00000000000000..3b665089c39145 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-twitter_paraphrase_embeddings_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English twitter_paraphrase_embeddings_pipeline pipeline MPNetEmbeddings from mspy +author: John Snow Labs +name: twitter_paraphrase_embeddings_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_paraphrase_embeddings_pipeline` is a English model originally trained by mspy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_paraphrase_embeddings_pipeline_en_5.5.0_3.0_1725470765802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_paraphrase_embeddings_pipeline_en_5.5.0_3.0_1725470765802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_paraphrase_embeddings_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_paraphrase_embeddings_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_paraphrase_embeddings_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/mspy/twitter-paraphrase-embeddings + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-twitter_roberta_large_hate_latest_en.md b/docs/_posts/ahmedlone127/2024-09-04-twitter_roberta_large_hate_latest_en.md new file mode 100644 index 00000000000000..faf6dbd2bd7c43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-twitter_roberta_large_hate_latest_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_large_hate_latest RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: twitter_roberta_large_hate_latest +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_large_hate_latest` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_large_hate_latest_en_5.5.0_3.0_1725486264936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_large_hate_latest_en_5.5.0_3.0_1725486264936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_large_hate_latest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_large_hate_latest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_large_hate_latest| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-large-hate-latest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en.md new file mode 100644 index 00000000000000..fa3a48db8f4ea6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English umberto_fine_tuned_docclass_punjabi_eastern_pipeline pipeline CamemBertForSequenceClassification from colinglab +author: John Snow Labs +name: umberto_fine_tuned_docclass_punjabi_eastern_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`umberto_fine_tuned_docclass_punjabi_eastern_pipeline` is a English model originally trained by colinglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en_5.5.0_3.0_1725466433377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_docclass_punjabi_eastern_pipeline_en_5.5.0_3.0_1725466433377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("umberto_fine_tuned_docclass_punjabi_eastern_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("umberto_fine_tuned_docclass_punjabi_eastern_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|umberto_fine_tuned_docclass_punjabi_eastern_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.6 MB| + +## References + +https://huggingface.co/colinglab/UMBERTO_fine-tuned_DocClass_PA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-v39_en.md b/docs/_posts/ahmedlone127/2024-09-04-v39_en.md new file mode 100644 index 00000000000000..3d6d43b2468075 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-v39_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English v39 AlbertForTokenClassification from LogicCrafters +author: John Snow Labs +name: v39 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`v39` is a English model originally trained by LogicCrafters. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/v39_en_5.5.0_3.0_1725487017830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/v39_en_5.5.0_3.0_1725487017830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("v39","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("v39", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|v39| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|771.0 MB| + +## References + +https://huggingface.co/LogicCrafters/v39 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-v51_en.md b/docs/_posts/ahmedlone127/2024-09-04-v51_en.md new file mode 100644 index 00000000000000..735ee36d9e5ad5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-v51_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English v51 AlbertForTokenClassification from LogicCrafters +author: John Snow Labs +name: v51 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`v51` is a English model originally trained by LogicCrafters. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/v51_en_5.5.0_3.0_1725487166880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/v51_en_5.5.0_3.0_1725487166880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("v51","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("v51", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|v51| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|393.2 MB| + +## References + +https://huggingface.co/LogicCrafters/v51 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-vir_pat_qa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-vir_pat_qa_pipeline_en.md new file mode 100644 index 00000000000000..7a21bccbc57ef0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-vir_pat_qa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English vir_pat_qa_pipeline pipeline RoBertaForQuestionAnswering from Mikelium5 +author: John Snow Labs +name: vir_pat_qa_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vir_pat_qa_pipeline` is a English model originally trained by Mikelium5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vir_pat_qa_pipeline_en_5.5.0_3.0_1725483460670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vir_pat_qa_pipeline_en_5.5.0_3.0_1725483460670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("vir_pat_qa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("vir_pat_qa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vir_pat_qa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|459.9 MB| + +## References + +https://huggingface.co/Mikelium5/VIR-PAT-QA + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-vispell_small_v1_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-04-vispell_small_v1_pipeline_vi.md new file mode 100644 index 00000000000000..e5a5b0d9cae463 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-vispell_small_v1_pipeline_vi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Vietnamese vispell_small_v1_pipeline pipeline MarianTransformer from ademax +author: John Snow Labs +name: vispell_small_v1_pipeline +date: 2024-09-04 +tags: [vi, open_source, pipeline, onnx] +task: Translation +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vispell_small_v1_pipeline` is a Vietnamese model originally trained by ademax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vispell_small_v1_pipeline_vi_5.5.0_3.0_1725493763780.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vispell_small_v1_pipeline_vi_5.5.0_3.0_1725493763780.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("vispell_small_v1_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("vispell_small_v1_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vispell_small_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|404.0 MB| + +## References + +https://huggingface.co/ademax/vispell-small-v1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx.md new file mode 100644 index 00000000000000..44be5b614d93e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline pipeline DistilBertForTokenClassification from ensw +author: John Snow Labs +name: week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline` is a Multilingual model originally trained by ensw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx_5.5.0_3.0_1725448485068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline_xx_5.5.0_3.0_1725448485068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|week5_eng_distilbert_base_multilingual_cased_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/ensw/week5-eng-distilbert-base-multilingual-cased-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-whisper_small_finetune_taiwanese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-whisper_small_finetune_taiwanese_pipeline_en.md new file mode 100644 index 00000000000000..ec7845654eb414 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-whisper_small_finetune_taiwanese_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_finetune_taiwanese_pipeline pipeline WhisperForCTC from Jackyhsien +author: John Snow Labs +name: whisper_small_finetune_taiwanese_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_finetune_taiwanese_pipeline` is a English model originally trained by Jackyhsien. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_finetune_taiwanese_pipeline_en_5.5.0_3.0_1725430209044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_finetune_taiwanese_pipeline_en_5.5.0_3.0_1725430209044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_finetune_taiwanese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_finetune_taiwanese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_finetune_taiwanese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Jackyhsien/whisper-small-finetune-taiwanese + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-whisper_small_singlish_122k_en.md b/docs/_posts/ahmedlone127/2024-09-04-whisper_small_singlish_122k_en.md new file mode 100644 index 00000000000000..c198edfd1f928d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-whisper_small_singlish_122k_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_singlish_122k WhisperForCTC from jensenlwt +author: John Snow Labs +name: whisper_small_singlish_122k +date: 2024-09-04 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_singlish_122k` is a English model originally trained by jensenlwt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_singlish_122k_en_5.5.0_3.0_1725430236223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_singlish_122k_en_5.5.0_3.0_1725430236223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_singlish_122k","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_singlish_122k", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_singlish_122k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jensenlwt/whisper-small-singlish-122k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_english_tyocre_en.md b/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_english_tyocre_en.md new file mode 100644 index 00000000000000..0ea7ed961b494b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_english_tyocre_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_english_tyocre WhisperForCTC from TyoCre +author: John Snow Labs +name: whisper_tiny_english_tyocre +date: 2024-09-04 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_english_tyocre` is a English model originally trained by TyoCre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_tyocre_en_5.5.0_3.0_1725425411217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_tyocre_en_5.5.0_3.0_1725425411217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_english_tyocre","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_english_tyocre", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_english_tyocre| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/TyoCre/whisper-tiny-english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_finetune_pooya_fallah_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_finetune_pooya_fallah_pipeline_en.md new file mode 100644 index 00000000000000..aeeda6a98b8d3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-whisper_tiny_finetune_pooya_fallah_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_finetune_pooya_fallah_pipeline pipeline WhisperForCTC from Pooya-Fallah +author: John Snow Labs +name: whisper_tiny_finetune_pooya_fallah_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_finetune_pooya_fallah_pipeline` is a English model originally trained by Pooya-Fallah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_finetune_pooya_fallah_pipeline_en_5.5.0_3.0_1725430778156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_finetune_pooya_fallah_pipeline_en_5.5.0_3.0_1725430778156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_finetune_pooya_fallah_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_finetune_pooya_fallah_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_finetune_pooya_fallah_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|393.9 MB| + +## References + +https://huggingface.co/Pooya-Fallah/whisper-tiny-finetune + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_balance_mixed_aug_replace_bert_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_balance_mixed_aug_replace_bert_en.md new file mode 100644 index 00000000000000..8a7abfc7c0456c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_balance_mixed_aug_replace_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_balance_mixed_aug_replace_bert XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_balance_mixed_aug_replace_bert +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_balance_mixed_aug_replace_bert` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_replace_bert_en_5.5.0_3.0_1725411273334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_replace_bert_en_5.5.0_3.0_1725411273334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_balance_mixed_aug_replace_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_balance_mixed_aug_replace_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_balance_mixed_aug_replace_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|798.9 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Balance_Mixed-aug_replace_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_amharic_am.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_amharic_am.md new file mode 100644 index 00000000000000..dc940ac306854f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_amharic_am.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Amharic xlm_roberta_base_finetuned_amharic XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_amharic +date: 2024-09-04 +tags: [am, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: am +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_amharic` is a Amharic model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_amharic_am_5.5.0_3.0_1725416838168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_amharic_am_5.5.0_3.0_1725416838168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_amharic","am") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_amharic","am") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_amharic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|am| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-amharic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en.md new file mode 100644 index 00000000000000..26541f6fb83e85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline pipeline XlmRoBertaForTokenClassification from MonkDalma +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline` is a English model originally trained by MonkDalma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en_5.5.0_3.0_1725447262989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline_en_5.5.0_3.0_1725447262989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_monkdalma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/MonkDalma/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_en.md new file mode 100644 index 00000000000000..51e1d8cf7afb80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_ultimecia XlmRoBertaForTokenClassification from ultimecia +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_ultimecia +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_ultimecia` is a English model originally trained by ultimecia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ultimecia_en_5.5.0_3.0_1725445761612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ultimecia_en_5.5.0_3.0_1725445761612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_ultimecia","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_ultimecia", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_ultimecia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|846.0 MB| + +## References + +https://huggingface.co/ultimecia/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en.md new file mode 100644 index 00000000000000..daf38e4e29dee2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline pipeline XlmRoBertaForTokenClassification from ultimecia +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline` is a English model originally trained by ultimecia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en_5.5.0_3.0_1725445848021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline_en_5.5.0_3.0_1725445848021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_ultimecia_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|846.0 MB| + +## References + +https://huggingface.co/ultimecia/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en.md new file mode 100644 index 00000000000000..1f53fd5a1d6667 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline pipeline XlmRoBertaForTokenClassification from bluetree99 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline` is a English model originally trained by bluetree99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en_5.5.0_3.0_1725437001047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline_en_5.5.0_3.0_1725437001047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_bluetree99_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/bluetree99/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en.md new file mode 100644 index 00000000000000..ed63b64fffa7ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_chaoli_pipeline pipeline XlmRoBertaForTokenClassification from ChaoLi +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_chaoli_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_chaoli_pipeline` is a English model originally trained by ChaoLi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en_5.5.0_3.0_1725446339829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_chaoli_pipeline_en_5.5.0_3.0_1725446339829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_chaoli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_chaoli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_chaoli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/ChaoLi/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_handun_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_handun_en.md new file mode 100644 index 00000000000000..efb3f7a39a12ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_handun_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_handun XlmRoBertaForTokenClassification from Handun +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_handun +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_handun` is a English model originally trained by Handun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_handun_en_5.5.0_3.0_1725445755341.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_handun_en_5.5.0_3.0_1725445755341.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_handun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_handun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_handun| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/Handun/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_en.md new file mode 100644 index 00000000000000..aaa3824394c620 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_taoyoung XlmRoBertaForTokenClassification from taoyoung +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_taoyoung +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_taoyoung` is a English model originally trained by taoyoung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_taoyoung_en_5.5.0_3.0_1725436674501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_taoyoung_en_5.5.0_3.0_1725436674501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_taoyoung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_taoyoung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_taoyoung| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|832.1 MB| + +## References + +https://huggingface.co/taoyoung/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en.md new file mode 100644 index 00000000000000..5218769542f5f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline pipeline XlmRoBertaForTokenClassification from taoyoung +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline` is a English model originally trained by taoyoung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en_5.5.0_3.0_1725436766146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline_en_5.5.0_3.0_1725436766146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_taoyoung_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|832.2 MB| + +## References + +https://huggingface.co/taoyoung/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_bessho_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_bessho_en.md new file mode 100644 index 00000000000000..4f2d03788cb6a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_bessho_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_bessho XlmRoBertaForTokenClassification from bessho +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_bessho +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_bessho` is a English model originally trained by bessho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_bessho_en_5.5.0_3.0_1725436608356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_bessho_en_5.5.0_3.0_1725436608356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_bessho","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_bessho", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_bessho| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/bessho/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_blanche_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_blanche_en.md new file mode 100644 index 00000000000000..11f32e66f829a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_blanche_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_blanche XlmRoBertaForTokenClassification from Blanche +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_blanche +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_blanche` is a English model originally trained by Blanche. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_blanche_en_5.5.0_3.0_1725447100103.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_blanche_en_5.5.0_3.0_1725447100103.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_blanche","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_blanche", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_blanche| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|815.0 MB| + +## References + +https://huggingface.co/Blanche/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en.md new file mode 100644 index 00000000000000..7f52aa7f9daf1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr XlmRoBertaForTokenClassification from 0ppxnhximxr +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr` is a English model originally trained by 0ppxnhximxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en_5.5.0_3.0_1725446484855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_en_5.5.0_3.0_1725446484855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/0ppxnhximxr/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en.md new file mode 100644 index 00000000000000..a4ccf3c792672a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline pipeline XlmRoBertaForTokenClassification from 0ppxnhximxr +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline` is a English model originally trained by 0ppxnhximxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en_5.5.0_3.0_1725446571214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline_en_5.5.0_3.0_1725446571214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_0ppxnhximxr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/0ppxnhximxr/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_en.md new file mode 100644 index 00000000000000..521d78d0b3ba1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_drigb XlmRoBertaForTokenClassification from drigb +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_drigb +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_drigb` is a English model originally trained by drigb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_drigb_en_5.5.0_3.0_1725437302781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_drigb_en_5.5.0_3.0_1725437302781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_drigb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_drigb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_drigb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/drigb/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en.md new file mode 100644 index 00000000000000..e1d1b78f5efe31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline pipeline XlmRoBertaForTokenClassification from drigb +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline` is a English model originally trained by drigb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en_5.5.0_3.0_1725437369630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline_en_5.5.0_3.0_1725437369630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_drigb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/drigb/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_en.md new file mode 100644 index 00000000000000..af21109a90b951 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_neha2608 XlmRoBertaForTokenClassification from Neha2608 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_neha2608 +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_neha2608` is a English model originally trained by Neha2608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_neha2608_en_5.5.0_3.0_1725447236731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_neha2608_en_5.5.0_3.0_1725447236731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_neha2608","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_neha2608", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_neha2608| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Neha2608/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en.md new file mode 100644 index 00000000000000..8ef688f7e82514 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline pipeline XlmRoBertaForTokenClassification from Neha2608 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline` is a English model originally trained by Neha2608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en_5.5.0_3.0_1725447303639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline_en_5.5.0_3.0_1725447303639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_neha2608_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Neha2608/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en.md new file mode 100644 index 00000000000000..197fa75fcb1f52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama XlmRoBertaForTokenClassification from tamo2-3yama +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama` is a English model originally trained by tamo2-3yama. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en_5.5.0_3.0_1725446506020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_en_5.5.0_3.0_1725446506020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/tamo2-3yama/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en.md new file mode 100644 index 00000000000000..86fe36110a0375 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline pipeline XlmRoBertaForTokenClassification from tamo2-3yama +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline` is a English model originally trained by tamo2-3yama. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en_5.5.0_3.0_1725446572090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline_en_5.5.0_3.0_1725446572090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_tamo2_3yama_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/tamo2-3yama/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_team_nave_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_team_nave_en.md new file mode 100644 index 00000000000000..8b10938350d362 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_french_team_nave_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_team_nave XlmRoBertaForTokenClassification from team-nave +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_team_nave +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_team_nave` is a English model originally trained by team-nave. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_team_nave_en_5.5.0_3.0_1725436099350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_team_nave_en_5.5.0_3.0_1725436099350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_team_nave","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_team_nave", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_team_nave| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|857.0 MB| + +## References + +https://huggingface.co/team-nave/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en.md new file mode 100644 index 00000000000000..d00ea7ac47fe4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_junf1122_pipeline pipeline XlmRoBertaForTokenClassification from JunF1122 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_junf1122_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_junf1122_pipeline` is a English model originally trained by JunF1122. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en_5.5.0_3.0_1725447939907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_junf1122_pipeline_en_5.5.0_3.0_1725447939907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_junf1122_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_junf1122_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_junf1122_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/JunF1122/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_malduwais_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_malduwais_en.md new file mode 100644 index 00000000000000..3d0ed28cf0bee7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_malduwais_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_malduwais XlmRoBertaForTokenClassification from malduwais +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_malduwais +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_malduwais` is a English model originally trained by malduwais. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_malduwais_en_5.5.0_3.0_1725423758364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_malduwais_en_5.5.0_3.0_1725423758364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_malduwais","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_malduwais", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_malduwais| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|832.4 MB| + +## References + +https://huggingface.co/malduwais/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sbpark_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sbpark_en.md new file mode 100644 index 00000000000000..80ce45a5596f38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sbpark_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sbpark XlmRoBertaForTokenClassification from sbpark +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sbpark +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sbpark` is a English model originally trained by sbpark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sbpark_en_5.5.0_3.0_1725447712371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sbpark_en_5.5.0_3.0_1725447712371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sbpark","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sbpark", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sbpark| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/sbpark/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_songys_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_songys_pipeline_en.md new file mode 100644 index 00000000000000..0d672e72fdaf28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_songys_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_songys_pipeline pipeline XlmRoBertaForTokenClassification from songys +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_songys_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_songys_pipeline` is a English model originally trained by songys. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_songys_pipeline_en_5.5.0_3.0_1725436297370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_songys_pipeline_en_5.5.0_3.0_1725436297370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_songys_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_songys_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_songys_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.2 MB| + +## References + +https://huggingface.co/songys/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sponomary_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sponomary_en.md new file mode 100644 index 00000000000000..49f57c1a6797ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_sponomary_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sponomary XlmRoBertaForTokenClassification from sponomary +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sponomary +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sponomary` is a English model originally trained by sponomary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sponomary_en_5.5.0_3.0_1725446146623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sponomary_en_5.5.0_3.0_1725446146623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sponomary","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sponomary", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sponomary| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/sponomary/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en.md new file mode 100644 index 00000000000000..70c3834e6c13b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_szogi_pipeline pipeline XlmRoBertaForTokenClassification from szogi +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_szogi_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_szogi_pipeline` is a English model originally trained by szogi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en_5.5.0_3.0_1725447626264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_szogi_pipeline_en_5.5.0_3.0_1725447626264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_szogi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_szogi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_szogi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/szogi/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_transll_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_transll_pipeline_en.md new file mode 100644 index 00000000000000..50d2a6635933c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_german_transll_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_transll_pipeline pipeline XlmRoBertaForTokenClassification from TransLL +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_transll_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_transll_pipeline` is a English model originally trained by TransLL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_transll_pipeline_en_5.5.0_3.0_1725437425330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_transll_pipeline_en_5.5.0_3.0_1725437425330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_transll_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_transll_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_transll_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/TransLL/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_cj_mills_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_cj_mills_en.md new file mode 100644 index 00000000000000..ac4bbee6f4fcd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_cj_mills_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_cj_mills XlmRoBertaForTokenClassification from cj-mills +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_cj_mills +date: 2024-09-04 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_cj_mills` is a English model originally trained by cj-mills. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_cj_mills_en_5.5.0_3.0_1725447364042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_cj_mills_en_5.5.0_3.0_1725447364042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_cj_mills","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_cj_mills", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_cj_mills| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|827.8 MB| + +## References + +https://huggingface.co/cj-mills/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en.md new file mode 100644 index 00000000000000..d105c877626d54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline pipeline XlmRoBertaForTokenClassification from sponomary +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline` is a English model originally trained by sponomary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en_5.5.0_3.0_1725437163248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline_en_5.5.0_3.0_1725437163248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_sponomary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/sponomary/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en.md new file mode 100644 index 00000000000000..739dd8ecf7f93c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline pipeline XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en_5.5.0_3.0_1725410680259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline_en_5.5.0_3.0_1725410680259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr0_0001_seed42_kinyarwanda_amh_eng_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|819.9 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr0.0001_seed42_kin-amh-eng_train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_augmentation_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_augmentation_pipeline_xx.md new file mode 100644 index 00000000000000..07adb13ba4ec9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_augmentation_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlm_roberta_base_ner_augmentation_pipeline pipeline XlmRoBertaForTokenClassification from rollerhafeezh-amikom +author: John Snow Labs +name: xlm_roberta_base_ner_augmentation_pipeline +date: 2024-09-04 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_ner_augmentation_pipeline` is a Multilingual model originally trained by rollerhafeezh-amikom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ner_augmentation_pipeline_xx_5.5.0_3.0_1725447535103.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ner_augmentation_pipeline_xx_5.5.0_3.0_1725447535103.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_ner_augmentation_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_ner_augmentation_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_ner_augmentation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|850.0 MB| + +## References + +https://huggingface.co/rollerhafeezh-amikom/xlm-roberta-base-ner-augmentation + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_xx.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_xx.md new file mode 100644 index 00000000000000..c3791ea081943f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_ner_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual xlm_roberta_base_ner XlmRoBertaForTokenClassification from orgcatorg +author: John Snow Labs +name: xlm_roberta_base_ner +date: 2024-09-04 +tags: [xx, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_ner` is a Multilingual model originally trained by orgcatorg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ner_xx_5.5.0_3.0_1725423901760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ner_xx_5.5.0_3.0_1725423901760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ner","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ner", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|868.9 MB| + +## References + +https://huggingface.co/orgcatorg/xlm-roberta-base-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_russian_sentiment_sentirueval2016_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_russian_sentiment_sentirueval2016_en.md new file mode 100644 index 00000000000000..c05da6000be005 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_russian_sentiment_sentirueval2016_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_russian_sentiment_sentirueval2016 XlmRoBertaForSequenceClassification from sismetanin +author: John Snow Labs +name: xlm_roberta_base_russian_sentiment_sentirueval2016 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_russian_sentiment_sentirueval2016` is a English model originally trained by sismetanin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_russian_sentiment_sentirueval2016_en_5.5.0_3.0_1725411040338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_russian_sentiment_sentirueval2016_en_5.5.0_3.0_1725411040338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_russian_sentiment_sentirueval2016","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_russian_sentiment_sentirueval2016", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_russian_sentiment_sentirueval2016| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|797.1 MB| + +## References + +https://huggingface.co/sismetanin/xlm_roberta_base-ru-sentiment-sentirueval2016 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_en.md new file mode 100644 index 00000000000000..6ff7b370fe46a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_french_xnli_french XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_french_xnli_french +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_french_xnli_french` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_xnli_french_en_5.5.0_3.0_1725410007459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_xnli_french_en_5.5.0_3.0_1725410007459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_french_xnli_french","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_french_xnli_french", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_french_xnli_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|515.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-fr-xnli-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_pipeline_en.md new file mode 100644 index 00000000000000..4911aa8044e007 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_base_trimmed_french_xnli_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_french_xnli_french_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_french_xnli_french_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_french_xnli_french_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_xnli_french_pipeline_en_5.5.0_3.0_1725410055314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_xnli_french_pipeline_en_5.5.0_3.0_1725410055314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_trimmed_french_xnli_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_trimmed_french_xnli_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_french_xnli_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|515.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-fr-xnli-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en.md new file mode 100644 index 00000000000000..e29fa6e5d17809 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed XlmRoBertaForSequenceClassification from Karim-Gamal +author: John Snow Labs +name: xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed` is a English model originally trained by Karim-Gamal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en_5.5.0_3.0_1725410624680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed_en_5.5.0_3.0_1725410624680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_finetuned_emojis_2_client_toxic_fedavg_iid_fed| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Karim-Gamal/XLM-Roberta-finetuned-emojis-2-client-toxic-FedAvg-IID-Fed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en.md new file mode 100644 index 00000000000000..6a1038ffab8273 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5 XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5 +date: 2024-09-04 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en_5.5.0_3.0_1725482122597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_en_5.5.0_3.0_1725482122597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|818.0 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694025616-16-2e-06-0.01-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en.md new file mode 100644 index 00000000000000..ec65c4bb6523cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline pipeline XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline +date: 2024-09-04 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en_5.5.0_3.0_1725482248170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline_en_5.5.0_3.0_1725482248170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694025616_16_2e_06_0_01_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.0 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694025616-16-2e-06-0.01-5 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-04-yelp_polarity_microsoft_deberta_v3_base_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-04-yelp_polarity_microsoft_deberta_v3_base_seed_1_en.md new file mode 100644 index 00000000000000..4fc25ee4446df5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-04-yelp_polarity_microsoft_deberta_v3_base_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English yelp_polarity_microsoft_deberta_v3_base_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: yelp_polarity_microsoft_deberta_v3_base_seed_1 +date: 2024-09-04 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yelp_polarity_microsoft_deberta_v3_base_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_1_en_5.5.0_3.0_1725468392028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_1_en_5.5.0_3.0_1725468392028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("yelp_polarity_microsoft_deberta_v3_base_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("yelp_polarity_microsoft_deberta_v3_base_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yelp_polarity_microsoft_deberta_v3_base_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|666.0 MB| + +## References + +https://huggingface.co/utahnlp/yelp_polarity_microsoft_deberta-v3-base_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_en.md b/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_en.md new file mode 100644 index 00000000000000..4089719b1039ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 100_sdb_taxxl_truncate_768 DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 100_sdb_taxxl_truncate_768 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`100_sdb_taxxl_truncate_768` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_truncate_768_en_5.5.0_3.0_1725524715035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_truncate_768_en_5.5.0_3.0_1725524715035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("100_sdb_taxxl_truncate_768","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("100_sdb_taxxl_truncate_768","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|100_sdb_taxxl_truncate_768| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/sripadhstudy/100_SDB_TAxxL_truncate_768 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_pipeline_en.md new file mode 100644 index 00000000000000..da0eff0cedf8ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-100_sdb_taxxl_truncate_768_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 100_sdb_taxxl_truncate_768_pipeline pipeline DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 100_sdb_taxxl_truncate_768_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`100_sdb_taxxl_truncate_768_pipeline` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_truncate_768_pipeline_en_5.5.0_3.0_1725524727389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_truncate_768_pipeline_en_5.5.0_3.0_1725524727389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("100_sdb_taxxl_truncate_768_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("100_sdb_taxxl_truncate_768_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|100_sdb_taxxl_truncate_768_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/sripadhstudy/100_SDB_TAxxL_truncate_768 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-1genreviewssentimentsamples_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-1genreviewssentimentsamples_pipeline_en.md new file mode 100644 index 00000000000000..e9864fcfa5db2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-1genreviewssentimentsamples_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 1genreviewssentimentsamples_pipeline pipeline DistilBertForSequenceClassification from Shraddha257 +author: John Snow Labs +name: 1genreviewssentimentsamples_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`1genreviewssentimentsamples_pipeline` is a English model originally trained by Shraddha257. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/1genreviewssentimentsamples_pipeline_en_5.5.0_3.0_1725580058792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/1genreviewssentimentsamples_pipeline_en_5.5.0_3.0_1725580058792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("1genreviewssentimentsamples_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("1genreviewssentimentsamples_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|1genreviewssentimentsamples_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Shraddha257/1GENReviewsSentimentSamples + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_en.md b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_en.md new file mode 100644 index 00000000000000..2874ee890cac77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 2020_q1_full_tweets RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q1_full_tweets +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q1_full_tweets` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_en_5.5.0_3.0_1725572886632.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_en_5.5.0_3.0_1725572886632.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("2020_q1_full_tweets","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("2020_q1_full_tweets","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q1_full_tweets| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q1-full_tweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_pipeline_en.md new file mode 100644 index 00000000000000..0c4c9c9567c759 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 2020_q1_full_tweets_pipeline pipeline RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q1_full_tweets_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q1_full_tweets_pipeline` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_pipeline_en_5.5.0_3.0_1725572910116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_pipeline_en_5.5.0_3.0_1725572910116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("2020_q1_full_tweets_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("2020_q1_full_tweets_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q1_full_tweets_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q1-full_tweets + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_en.md b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_en.md new file mode 100644 index 00000000000000..cb6f626cae9367 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 2020_q1_full_tweets_tok RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q1_full_tweets_tok +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q1_full_tweets_tok` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_tok_en_5.5.0_3.0_1725577803895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_tok_en_5.5.0_3.0_1725577803895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("2020_q1_full_tweets_tok","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("2020_q1_full_tweets_tok","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q1_full_tweets_tok| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|464.4 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q1-full_tweets_tok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_pipeline_en.md new file mode 100644 index 00000000000000..bd1d9d5c9407c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-2020_q1_full_tweets_tok_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 2020_q1_full_tweets_tok_pipeline pipeline RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q1_full_tweets_tok_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q1_full_tweets_tok_pipeline` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_tok_pipeline_en_5.5.0_3.0_1725577828913.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q1_full_tweets_tok_pipeline_en_5.5.0_3.0_1725577828913.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("2020_q1_full_tweets_tok_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("2020_q1_full_tweets_tok_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q1_full_tweets_tok_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.4 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q1-full_tweets_tok + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_en.md b/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_en.md new file mode 100644 index 00000000000000..3bf8e7ce2c7d19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English 20230328_001_baseline_xlmr_clickbait_spoiling XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: 20230328_001_baseline_xlmr_clickbait_spoiling +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20230328_001_baseline_xlmr_clickbait_spoiling` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20230328_001_baseline_xlmr_clickbait_spoiling_en_5.5.0_3.0_1725567961805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20230328_001_baseline_xlmr_clickbait_spoiling_en_5.5.0_3.0_1725567961805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("20230328_001_baseline_xlmr_clickbait_spoiling","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("20230328_001_baseline_xlmr_clickbait_spoiling", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20230328_001_baseline_xlmr_clickbait_spoiling| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/20230328-001-baseline-xlmr-clickbait-spoiling \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en.md new file mode 100644 index 00000000000000..57f6d9ef8482e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 20230328_001_baseline_xlmr_clickbait_spoiling_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: 20230328_001_baseline_xlmr_clickbait_spoiling_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20230328_001_baseline_xlmr_clickbait_spoiling_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en_5.5.0_3.0_1725568030386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20230328_001_baseline_xlmr_clickbait_spoiling_pipeline_en_5.5.0_3.0_1725568030386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("20230328_001_baseline_xlmr_clickbait_spoiling_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("20230328_001_baseline_xlmr_clickbait_spoiling_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20230328_001_baseline_xlmr_clickbait_spoiling_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/20230328-001-baseline-xlmr-clickbait-spoiling + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-4_epoch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-4_epoch_pipeline_en.md new file mode 100644 index 00000000000000..cde87affadc80b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-4_epoch_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 4_epoch_pipeline pipeline DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: 4_epoch_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4_epoch_pipeline` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4_epoch_pipeline_en_5.5.0_3.0_1725506585586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4_epoch_pipeline_en_5.5.0_3.0_1725506585586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("4_epoch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("4_epoch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4_epoch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/Gkumi/4-epoch + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-accu_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-accu_1_en.md new file mode 100644 index 00000000000000..931cff07982583 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-accu_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English accu_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_1_en_5.5.0_3.0_1725541602841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_1_en_5.5.0_3.0_1725541602841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-accu_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-accu_1_pipeline_en.md new file mode 100644 index 00000000000000..4e1eec5b6d721e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-accu_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English accu_1_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_1_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_1_pipeline_en_5.5.0_3.0_1725541626207.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_1_pipeline_en_5.5.0_3.0_1725541626207.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("accu_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("accu_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-accu_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-accu_4_pipeline_en.md new file mode 100644 index 00000000000000..2bf113b915bdb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-accu_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English accu_4_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_4_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_4_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_4_pipeline_en_5.5.0_3.0_1725541560750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_4_pipeline_en_5.5.0_3.0_1725541560750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("accu_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("accu_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-address_extraction_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-address_extraction_ner_pipeline_en.md new file mode 100644 index 00000000000000..233d78e68f5a36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-address_extraction_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English address_extraction_ner_pipeline pipeline DistilBertForTokenClassification from kulkarni-harsh +author: John Snow Labs +name: address_extraction_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`address_extraction_ner_pipeline` is a English model originally trained by kulkarni-harsh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/address_extraction_ner_pipeline_en_5.5.0_3.0_1725495552960.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/address_extraction_ner_pipeline_en_5.5.0_3.0_1725495552960.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("address_extraction_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("address_extraction_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|address_extraction_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/kulkarni-harsh/address-extraction-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-adp_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-adp_model_pipeline_en.md new file mode 100644 index 00000000000000..c1a97fad9f5ba6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-adp_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English adp_model_pipeline pipeline DistilBertForSequenceClassification from bitdribble +author: John Snow Labs +name: adp_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`adp_model_pipeline` is a English model originally trained by bitdribble. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/adp_model_pipeline_en_5.5.0_3.0_1725506996853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/adp_model_pipeline_en_5.5.0_3.0_1725506996853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("adp_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("adp_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|adp_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/bitdribble/adp_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_en.md new file mode 100644 index 00000000000000..ca691fd6ede3a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English afriqa_afroxlmr_squad_v2 XlmRoBertaForQuestionAnswering from masakhane +author: John Snow Labs +name: afriqa_afroxlmr_squad_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afriqa_afroxlmr_squad_v2` is a English model originally trained by masakhane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afriqa_afroxlmr_squad_v2_en_5.5.0_3.0_1725558827555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afriqa_afroxlmr_squad_v2_en_5.5.0_3.0_1725558827555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("afriqa_afroxlmr_squad_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("afriqa_afroxlmr_squad_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afriqa_afroxlmr_squad_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/masakhane/afriqa_afroxlmr_squad_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_pipeline_en.md new file mode 100644 index 00000000000000..0b3a29c386eefc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-afriqa_afroxlmr_squad_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English afriqa_afroxlmr_squad_v2_pipeline pipeline XlmRoBertaForQuestionAnswering from masakhane +author: John Snow Labs +name: afriqa_afroxlmr_squad_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afriqa_afroxlmr_squad_v2_pipeline` is a English model originally trained by masakhane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afriqa_afroxlmr_squad_v2_pipeline_en_5.5.0_3.0_1725558881591.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afriqa_afroxlmr_squad_v2_pipeline_en_5.5.0_3.0_1725558881591.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("afriqa_afroxlmr_squad_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("afriqa_afroxlmr_squad_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afriqa_afroxlmr_squad_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/masakhane/afriqa_afroxlmr_squad_v2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md new file mode 100644 index 00000000000000..67d3b95fab52ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English afro_xlmr_base_finetuned_kintweetsc_pipeline pipeline XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: afro_xlmr_base_finetuned_kintweetsc_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afro_xlmr_base_finetuned_kintweetsc_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725509254382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725509254382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("afro_xlmr_base_finetuned_kintweetsc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("afro_xlmr_base_finetuned_kintweetsc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afro_xlmr_base_finetuned_kintweetsc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-base-finetuned-kintweetsC + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsd_en.md b/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsd_en.md new file mode 100644 index 00000000000000..a9c7d9dad6a31f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-afro_xlmr_base_finetuned_kintweetsd_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English afro_xlmr_base_finetuned_kintweetsd XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: afro_xlmr_base_finetuned_kintweetsd +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afro_xlmr_base_finetuned_kintweetsd` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsd_en_5.5.0_3.0_1725509598904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsd_en_5.5.0_3.0_1725509598904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("afro_xlmr_base_finetuned_kintweetsd","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("afro_xlmr_base_finetuned_kintweetsd","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afro_xlmr_base_finetuned_kintweetsd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-base-finetuned-kintweetsD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_en.md b/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_en.md new file mode 100644 index 00000000000000..7fb46a3ebf43d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ai_text_detector_mhk1122 DistilBertForSequenceClassification from MHK1122 +author: John Snow Labs +name: ai_text_detector_mhk1122 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai_text_detector_mhk1122` is a English model originally trained by MHK1122. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai_text_detector_mhk1122_en_5.5.0_3.0_1725579854315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai_text_detector_mhk1122_en_5.5.0_3.0_1725579854315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("ai_text_detector_mhk1122","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("ai_text_detector_mhk1122", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai_text_detector_mhk1122| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MHK1122/AI_text_detector \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_pipeline_en.md new file mode 100644 index 00000000000000..e28ffd3d0f0876 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ai_text_detector_mhk1122_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ai_text_detector_mhk1122_pipeline pipeline DistilBertForSequenceClassification from MHK1122 +author: John Snow Labs +name: ai_text_detector_mhk1122_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai_text_detector_mhk1122_pipeline` is a English model originally trained by MHK1122. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai_text_detector_mhk1122_pipeline_en_5.5.0_3.0_1725579870632.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai_text_detector_mhk1122_pipeline_en_5.5.0_3.0_1725579870632.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ai_text_detector_mhk1122_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ai_text_detector_mhk1122_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai_text_detector_mhk1122_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MHK1122/AI_text_detector + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-aift_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-aift_model_pipeline_en.md new file mode 100644 index 00000000000000..e322773f3766ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-aift_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English aift_model_pipeline pipeline DistilBertForSequenceClassification from Cielciel +author: John Snow Labs +name: aift_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aift_model_pipeline` is a English model originally trained by Cielciel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aift_model_pipeline_en_5.5.0_3.0_1725507659351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aift_model_pipeline_en_5.5.0_3.0_1725507659351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("aift_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("aift_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aift_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Cielciel/aift-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en.md new file mode 100644 index 00000000000000..c4a04590e1a84c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate MarianTransformer from Akihiro2 +author: John Snow Labs +name: akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate` is a English model originally trained by Akihiro2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en_5.5.0_3.0_1725545139319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_en_5.5.0_3.0_1725545139319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|514.8 MB| + +## References + +https://huggingface.co/Akihiro2/akihiro2-finetuned-kde4-en-to-jp-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en.md new file mode 100644 index 00000000000000..4f1bf3f52a7763 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline pipeline MarianTransformer from Akihiro2 +author: John Snow Labs +name: akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline` is a English model originally trained by Akihiro2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en_5.5.0_3.0_1725545168425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline_en_5.5.0_3.0_1725545168425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akihiro2_finetuned_kde4_english_tonga_tonga_islands_jp_accelerate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|515.4 MB| + +## References + +https://huggingface.co/Akihiro2/akihiro2-finetuned-kde4-en-to-jp-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_en.md new file mode 100644 index 00000000000000..ab1e02d93682d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_base_spanish_2023_11_13_19_24 AlbertEmbeddings from Santp98 +author: John Snow Labs +name: albert_base_spanish_2023_11_13_19_24 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, albert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_spanish_2023_11_13_19_24` is a English model originally trained by Santp98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_spanish_2023_11_13_19_24_en_5.5.0_3.0_1725528472936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_spanish_2023_11_13_19_24_en_5.5.0_3.0_1725528472936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_base_spanish_2023_11_13_19_24","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_base_spanish_2023_11_13_19_24","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_spanish_2023_11_13_19_24| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|en| +|Size:|42.5 MB| + +## References + +https://huggingface.co/Santp98/albert-base-spanish-2023-11-13-19-24 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_pipeline_en.md new file mode 100644 index 00000000000000..de7943ee3aea0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_spanish_2023_11_13_19_24_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_spanish_2023_11_13_19_24_pipeline pipeline AlbertEmbeddings from Santp98 +author: John Snow Labs +name: albert_base_spanish_2023_11_13_19_24_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_spanish_2023_11_13_19_24_pipeline` is a English model originally trained by Santp98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_spanish_2023_11_13_19_24_pipeline_en_5.5.0_3.0_1725528475464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_spanish_2023_11_13_19_24_pipeline_en_5.5.0_3.0_1725528475464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_spanish_2023_11_13_19_24_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_spanish_2023_11_13_19_24_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_spanish_2023_11_13_19_24_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.5 MB| + +## References + +https://huggingface.co/Santp98/albert-base-spanish-2023-11-13-19-24 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_en.md new file mode 100644 index 00000000000000..f4a896cb4e23bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_base_v1_semeval2017 AlbertForSequenceClassification from HRui-Face +author: John Snow Labs +name: albert_base_v1_semeval2017 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v1_semeval2017` is a English model originally trained by HRui-Face. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v1_semeval2017_en_5.5.0_3.0_1725525364903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v1_semeval2017_en_5.5.0_3.0_1725525364903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v1_semeval2017","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v1_semeval2017", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v1_semeval2017| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/HRui-Face/albert-base-v1-semeval2017 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_pipeline_en.md new file mode 100644 index 00000000000000..28713d611786f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v1_semeval2017_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v1_semeval2017_pipeline pipeline AlbertForSequenceClassification from HRui-Face +author: John Snow Labs +name: albert_base_v1_semeval2017_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v1_semeval2017_pipeline` is a English model originally trained by HRui-Face. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v1_semeval2017_pipeline_en_5.5.0_3.0_1725525367423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v1_semeval2017_pipeline_en_5.5.0_3.0_1725525367423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v1_semeval2017_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v1_semeval2017_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v1_semeval2017_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/HRui-Face/albert-base-v1-semeval2017 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_en.md new file mode 100644 index 00000000000000..98774c4234cc1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_base_v2_fold_3 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_fold_3 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_fold_3` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_fold_3_en_5.5.0_3.0_1725543089377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_fold_3_en_5.5.0_3.0_1725543089377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_fold_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_fold_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_fold_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_fold_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_pipeline_en.md new file mode 100644 index 00000000000000..59ada5b6ccb5a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_base_v2_fold_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v2_fold_3_pipeline pipeline AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_fold_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_fold_3_pipeline` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_fold_3_pipeline_en_5.5.0_3.0_1725543091837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_fold_3_pipeline_en_5.5.0_3.0_1725543091837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v2_fold_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v2_fold_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_fold_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_fold_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_pipeline_zh.md new file mode 100644 index 00000000000000..2186101170e4d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_pipeline_zh.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Chinese albert_chinese_large_qa_pipeline pipeline BertForQuestionAnswering from wptoux +author: John Snow Labs +name: albert_chinese_large_qa_pipeline +date: 2024-09-05 +tags: [zh, open_source, pipeline, onnx] +task: Question Answering +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_chinese_large_qa_pipeline` is a Chinese model originally trained by wptoux. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_chinese_large_qa_pipeline_zh_5.5.0_3.0_1725554424293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_chinese_large_qa_pipeline_zh_5.5.0_3.0_1725554424293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_chinese_large_qa_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_chinese_large_qa_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_chinese_large_qa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|58.3 MB| + +## References + +https://huggingface.co/wptoux/albert-chinese-large-qa + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_zh.md b/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_zh.md new file mode 100644 index 00000000000000..ef8411f7192b4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_chinese_large_qa_zh.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Chinese albert_chinese_large_qa BertForQuestionAnswering from wptoux +author: John Snow Labs +name: albert_chinese_large_qa +date: 2024-09-05 +tags: [zh, open_source, onnx, question_answering, bert] +task: Question Answering +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_chinese_large_qa` is a Chinese model originally trained by wptoux. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_chinese_large_qa_zh_5.5.0_3.0_1725554421173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_chinese_large_qa_zh_5.5.0_3.0_1725554421173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("albert_chinese_large_qa","zh") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("albert_chinese_large_qa", "zh") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_chinese_large_qa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|zh| +|Size:|58.3 MB| + +## References + +https://huggingface.co/wptoux/albert-chinese-large-qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_en.md new file mode 100644 index 00000000000000..ac0e247f642501 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English albert_finetuned_tenbook_epoch BertForQuestionAnswering from DaydreamerF +author: John Snow Labs +name: albert_finetuned_tenbook_epoch +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_finetuned_tenbook_epoch` is a English model originally trained by DaydreamerF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_finetuned_tenbook_epoch_en_5.5.0_3.0_1725560439757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_finetuned_tenbook_epoch_en_5.5.0_3.0_1725560439757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("albert_finetuned_tenbook_epoch","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("albert_finetuned_tenbook_epoch", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_finetuned_tenbook_epoch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|37.4 MB| + +## References + +https://huggingface.co/DaydreamerF/albert-finetuned-TENBOOK-epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_pipeline_en.md new file mode 100644 index 00000000000000..3c15d96271f785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_finetuned_tenbook_epoch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English albert_finetuned_tenbook_epoch_pipeline pipeline BertForQuestionAnswering from DaydreamerF +author: John Snow Labs +name: albert_finetuned_tenbook_epoch_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_finetuned_tenbook_epoch_pipeline` is a English model originally trained by DaydreamerF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_finetuned_tenbook_epoch_pipeline_en_5.5.0_3.0_1725560441976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_finetuned_tenbook_epoch_pipeline_en_5.5.0_3.0_1725560441976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_finetuned_tenbook_epoch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_finetuned_tenbook_epoch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_finetuned_tenbook_epoch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|37.4 MB| + +## References + +https://huggingface.co/DaydreamerF/albert-finetuned-TENBOOK-epoch + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_fa.md b/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_fa.md new file mode 100644 index 00000000000000..ba6a637fc762cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian albert_persian_farsi_zwnj_base_v2 AlbertEmbeddings from HooshvareLab +author: John Snow Labs +name: albert_persian_farsi_zwnj_base_v2 +date: 2024-09-05 +tags: [fa, open_source, onnx, embeddings, albert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_zwnj_base_v2` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_fa_5.5.0_3.0_1725528375477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_fa_5.5.0_3.0_1725528375477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_persian_farsi_zwnj_base_v2","fa") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_persian_farsi_zwnj_base_v2","fa") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_zwnj_base_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|fa| +|Size:|41.9 MB| + +## References + +https://huggingface.co/HooshvareLab/albert-fa-zwnj-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_pipeline_fa.md new file mode 100644 index 00000000000000..2bf29ed2a89503 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_persian_farsi_zwnj_base_v2_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian albert_persian_farsi_zwnj_base_v2_pipeline pipeline AlbertEmbeddings from HooshvareLab +author: John Snow Labs +name: albert_persian_farsi_zwnj_base_v2_pipeline +date: 2024-09-05 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_zwnj_base_v2_pipeline` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_pipeline_fa_5.5.0_3.0_1725528377966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_pipeline_fa_5.5.0_3.0_1725528377966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_persian_farsi_zwnj_base_v2_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_persian_farsi_zwnj_base_v2_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_zwnj_base_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|41.9 MB| + +## References + +https://huggingface.co/HooshvareLab/albert-fa-zwnj-base-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_tiny_chinese_ws_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-05-albert_tiny_chinese_ws_pipeline_zh.md new file mode 100644 index 00000000000000..b1a61875162b19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_tiny_chinese_ws_pipeline_zh.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Chinese albert_tiny_chinese_ws_pipeline pipeline BertForTokenClassification from ckiplab +author: John Snow Labs +name: albert_tiny_chinese_ws_pipeline +date: 2024-09-05 +tags: [zh, open_source, pipeline, onnx] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tiny_chinese_ws_pipeline` is a Chinese model originally trained by ckiplab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_ws_pipeline_zh_5.5.0_3.0_1725538822785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_ws_pipeline_zh_5.5.0_3.0_1725538822785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_tiny_chinese_ws_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_tiny_chinese_ws_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tiny_chinese_ws_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|15.1 MB| + +## References + +https://huggingface.co/ckiplab/albert-tiny-chinese-ws + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_ar.md b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_ar.md new file mode 100644 index 00000000000000..349940fb1bcfef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic albert_xlarge_arabic AlbertEmbeddings from asafaya +author: John Snow Labs +name: albert_xlarge_arabic +date: 2024-09-05 +tags: [ar, open_source, onnx, embeddings, albert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_ar_5.5.0_3.0_1725568815914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_ar_5.5.0_3.0_1725568815914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("albert_xlarge_arabic","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("albert_xlarge_arabic","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|ar| +|Size:|204.6 MB| + +## References + +https://huggingface.co/asafaya/albert-xlarge-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_pipeline_ar.md new file mode 100644 index 00000000000000..fcd4207e1f688e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_arabic_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic albert_xlarge_arabic_pipeline pipeline AlbertEmbeddings from asafaya +author: John Snow Labs +name: albert_xlarge_arabic_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_arabic_pipeline` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_pipeline_ar_5.5.0_3.0_1725568826346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_pipeline_ar_5.5.0_3.0_1725568826346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_xlarge_arabic_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_xlarge_arabic_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|204.7 MB| + +## References + +https://huggingface.co/asafaya/albert-xlarge-arabic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..be803910e714ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_xlarge_v1_finetuned_mrpc AlbertForSequenceClassification from VitaliiVrublevskyi +author: John Snow Labs +name: albert_xlarge_v1_finetuned_mrpc +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_v1_finetuned_mrpc` is a English model originally trained by VitaliiVrublevskyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_v1_finetuned_mrpc_en_5.5.0_3.0_1725543085856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_v1_finetuned_mrpc_en_5.5.0_3.0_1725543085856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_v1_finetuned_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_v1_finetuned_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_v1_finetuned_mrpc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|220.4 MB| + +## References + +https://huggingface.co/VitaliiVrublevskyi/albert-xlarge-v1-finetuned-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_pipeline_en.md new file mode 100644 index 00000000000000..a98b4366cf1fa4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-albert_xlarge_v1_finetuned_mrpc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_xlarge_v1_finetuned_mrpc_pipeline pipeline AlbertForSequenceClassification from VitaliiVrublevskyi +author: John Snow Labs +name: albert_xlarge_v1_finetuned_mrpc_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_v1_finetuned_mrpc_pipeline` is a English model originally trained by VitaliiVrublevskyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_v1_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725543097069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_v1_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725543097069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_xlarge_v1_finetuned_mrpc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_xlarge_v1_finetuned_mrpc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_v1_finetuned_mrpc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|220.4 MB| + +## References + +https://huggingface.co/VitaliiVrublevskyi/albert-xlarge-v1-finetuned-mrpc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_ar.md b/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_ar.md new file mode 100644 index 00000000000000..48ef09c3d76fc5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic arabert_ner BertForTokenClassification from abdusah +author: John Snow Labs +name: arabert_ner +date: 2024-09-05 +tags: [ar, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert_ner` is a Arabic model originally trained by abdusah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert_ner_ar_5.5.0_3.0_1725564071664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert_ner_ar_5.5.0_3.0_1725564071664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("arabert_ner","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("arabert_ner", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/abdusah/arabert-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_pipeline_ar.md new file mode 100644 index 00000000000000..e77ba4631bf431 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-arabert_ner_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic arabert_ner_pipeline pipeline BertForTokenClassification from abdusah +author: John Snow Labs +name: arabert_ner_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert_ner_pipeline` is a Arabic model originally trained by abdusah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert_ner_pipeline_ar_5.5.0_3.0_1725564098471.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert_ner_pipeline_ar_5.5.0_3.0_1725564098471.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arabert_ner_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arabert_ner_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/abdusah/arabert-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-arabic_ner_ace_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-arabic_ner_ace_pipeline_ar.md new file mode 100644 index 00000000000000..8270e9907910ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-arabic_ner_ace_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic arabic_ner_ace_pipeline pipeline BertForTokenClassification from ychenNLP +author: John Snow Labs +name: arabic_ner_ace_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_ner_ace_pipeline` is a Arabic model originally trained by ychenNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_ner_ace_pipeline_ar_5.5.0_3.0_1725510960741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_ner_ace_pipeline_ar_5.5.0_3.0_1725510960741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arabic_ner_ace_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arabic_ner_ace_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_ner_ace_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|464.3 MB| + +## References + +https://huggingface.co/ychenNLP/arabic-ner-ace + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_en.md b/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_en.md new file mode 100644 index 00000000000000..582c8489d650a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English arazn_whisper_small WhisperForCTC from ahmedheakl +author: John Snow Labs +name: arazn_whisper_small +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arazn_whisper_small` is a English model originally trained by ahmedheakl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arazn_whisper_small_en_5.5.0_3.0_1725549225448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arazn_whisper_small_en_5.5.0_3.0_1725549225448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("arazn_whisper_small","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("arazn_whisper_small", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arazn_whisper_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ahmedheakl/arazn-whisper-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_pipeline_en.md new file mode 100644 index 00000000000000..12093e8809a9a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-arazn_whisper_small_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English arazn_whisper_small_pipeline pipeline WhisperForCTC from ahmedheakl +author: John Snow Labs +name: arazn_whisper_small_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arazn_whisper_small_pipeline` is a English model originally trained by ahmedheakl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arazn_whisper_small_pipeline_en_5.5.0_3.0_1725549313327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arazn_whisper_small_pipeline_en_5.5.0_3.0_1725549313327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arazn_whisper_small_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arazn_whisper_small_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arazn_whisper_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ahmedheakl/arazn-whisper-small + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_en.md new file mode 100644 index 00000000000000..694fe13c54987b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English assessing_advancing_question_classifier_v1 DistilBertForSequenceClassification from jpbywater +author: John Snow Labs +name: assessing_advancing_question_classifier_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`assessing_advancing_question_classifier_v1` is a English model originally trained by jpbywater. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/assessing_advancing_question_classifier_v1_en_5.5.0_3.0_1725579852496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/assessing_advancing_question_classifier_v1_en_5.5.0_3.0_1725579852496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("assessing_advancing_question_classifier_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("assessing_advancing_question_classifier_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|assessing_advancing_question_classifier_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jpbywater/assessing-advancing-question-classifier-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_pipeline_en.md new file mode 100644 index 00000000000000..2a4c77d2cb3ec2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-assessing_advancing_question_classifier_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English assessing_advancing_question_classifier_v1_pipeline pipeline DistilBertForSequenceClassification from jpbywater +author: John Snow Labs +name: assessing_advancing_question_classifier_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`assessing_advancing_question_classifier_v1_pipeline` is a English model originally trained by jpbywater. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/assessing_advancing_question_classifier_v1_pipeline_en_5.5.0_3.0_1725579872450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/assessing_advancing_question_classifier_v1_pipeline_en_5.5.0_3.0_1725579872450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("assessing_advancing_question_classifier_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("assessing_advancing_question_classifier_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|assessing_advancing_question_classifier_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jpbywater/assessing-advancing-question-classifier-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autofill_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-autofill_ner_pipeline_en.md new file mode 100644 index 00000000000000..1ff9a07bd53dd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autofill_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autofill_ner_pipeline pipeline DistilBertForTokenClassification from gouravchat +author: John Snow Labs +name: autofill_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autofill_ner_pipeline` is a English model originally trained by gouravchat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autofill_ner_pipeline_en_5.5.0_3.0_1725500916868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autofill_ner_pipeline_en_5.5.0_3.0_1725500916868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autofill_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autofill_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autofill_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gouravchat/autofill-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_en.md new file mode 100644 index 00000000000000..0399b66de1c86d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English autotrain_danaos_qa_system_49147118912 XlmRoBertaForQuestionAnswering from dimostratocaster2 +author: John Snow Labs +name: autotrain_danaos_qa_system_49147118912 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_danaos_qa_system_49147118912` is a English model originally trained by dimostratocaster2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_danaos_qa_system_49147118912_en_5.5.0_3.0_1725570503776.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_danaos_qa_system_49147118912_en_5.5.0_3.0_1725570503776.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("autotrain_danaos_qa_system_49147118912","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("autotrain_danaos_qa_system_49147118912", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_danaos_qa_system_49147118912| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|853.9 MB| + +## References + +https://huggingface.co/dimostratocaster2/autotrain-danaos_qa_system-49147118912 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_pipeline_en.md new file mode 100644 index 00000000000000..a45a10a631ad58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_danaos_qa_system_49147118912_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English autotrain_danaos_qa_system_49147118912_pipeline pipeline XlmRoBertaForQuestionAnswering from dimostratocaster2 +author: John Snow Labs +name: autotrain_danaos_qa_system_49147118912_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_danaos_qa_system_49147118912_pipeline` is a English model originally trained by dimostratocaster2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_danaos_qa_system_49147118912_pipeline_en_5.5.0_3.0_1725570597995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_danaos_qa_system_49147118912_pipeline_en_5.5.0_3.0_1725570597995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_danaos_qa_system_49147118912_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_danaos_qa_system_49147118912_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_danaos_qa_system_49147118912_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.9 MB| + +## References + +https://huggingface.co/dimostratocaster2/autotrain-danaos_qa_system-49147118912 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_en.md new file mode 100644 index 00000000000000..238f05707c6b23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English autotrain_qasbert_44603112362 BertForQuestionAnswering from reyhanAfri +author: John Snow Labs +name: autotrain_qasbert_44603112362 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_qasbert_44603112362` is a English model originally trained by reyhanAfri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_qasbert_44603112362_en_5.5.0_3.0_1725560091079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_qasbert_44603112362_en_5.5.0_3.0_1725560091079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("autotrain_qasbert_44603112362","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("autotrain_qasbert_44603112362", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_qasbert_44603112362| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/reyhanAfri/autotrain-qasbert-44603112362 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_pipeline_en.md new file mode 100644 index 00000000000000..e9b338724d6243 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_qasbert_44603112362_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English autotrain_qasbert_44603112362_pipeline pipeline BertForQuestionAnswering from reyhanAfri +author: John Snow Labs +name: autotrain_qasbert_44603112362_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_qasbert_44603112362_pipeline` is a English model originally trained by reyhanAfri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_qasbert_44603112362_pipeline_en_5.5.0_3.0_1725560093637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_qasbert_44603112362_pipeline_en_5.5.0_3.0_1725560093637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_qasbert_44603112362_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_qasbert_44603112362_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_qasbert_44603112362_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/reyhanAfri/autotrain-qasbert-44603112362 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_en.md new file mode 100644 index 00000000000000..8ed65e338521b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English autotrain_xlm_roberta_base_qa_95197146303 XlmRoBertaForQuestionAnswering from baketsu +author: John Snow Labs +name: autotrain_xlm_roberta_base_qa_95197146303 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_xlm_roberta_base_qa_95197146303` is a English model originally trained by baketsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_xlm_roberta_base_qa_95197146303_en_5.5.0_3.0_1725557597199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_xlm_roberta_base_qa_95197146303_en_5.5.0_3.0_1725557597199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("autotrain_xlm_roberta_base_qa_95197146303","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("autotrain_xlm_roberta_base_qa_95197146303", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_xlm_roberta_base_qa_95197146303| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|893.5 MB| + +## References + +https://huggingface.co/baketsu/autotrain-xlm-roberta-base-qa-95197146303 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_pipeline_en.md new file mode 100644 index 00000000000000..09cddc385070e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-autotrain_xlm_roberta_base_qa_95197146303_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English autotrain_xlm_roberta_base_qa_95197146303_pipeline pipeline XlmRoBertaForQuestionAnswering from baketsu +author: John Snow Labs +name: autotrain_xlm_roberta_base_qa_95197146303_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_xlm_roberta_base_qa_95197146303_pipeline` is a English model originally trained by baketsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_xlm_roberta_base_qa_95197146303_pipeline_en_5.5.0_3.0_1725557669858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_xlm_roberta_base_qa_95197146303_pipeline_en_5.5.0_3.0_1725557669858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_xlm_roberta_base_qa_95197146303_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_xlm_roberta_base_qa_95197146303_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_xlm_roberta_base_qa_95197146303_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|893.6 MB| + +## References + +https://huggingface.co/baketsu/autotrain-xlm-roberta-base-qa-95197146303 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_az.md b/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_az.md new file mode 100644 index 00000000000000..5fc919225aed04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_az.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Azerbaijani azerbaijani_question_answering RoBertaForQuestionAnswering from interneuronai +author: John Snow Labs +name: azerbaijani_question_answering +date: 2024-09-05 +tags: [az, open_source, onnx, question_answering, roberta] +task: Question Answering +language: az +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`azerbaijani_question_answering` is a Azerbaijani model originally trained by interneuronai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/azerbaijani_question_answering_az_5.5.0_3.0_1725576326672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/azerbaijani_question_answering_az_5.5.0_3.0_1725576326672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("azerbaijani_question_answering","az") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("azerbaijani_question_answering", "az") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|azerbaijani_question_answering| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|az| +|Size:|1.3 GB| + +## References + +https://huggingface.co/interneuronai/az-question-answering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_pipeline_az.md b/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_pipeline_az.md new file mode 100644 index 00000000000000..277bc45aa802e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-azerbaijani_question_answering_pipeline_az.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Azerbaijani azerbaijani_question_answering_pipeline pipeline RoBertaForQuestionAnswering from interneuronai +author: John Snow Labs +name: azerbaijani_question_answering_pipeline +date: 2024-09-05 +tags: [az, open_source, pipeline, onnx] +task: Question Answering +language: az +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`azerbaijani_question_answering_pipeline` is a Azerbaijani model originally trained by interneuronai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/azerbaijani_question_answering_pipeline_az_5.5.0_3.0_1725576393936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/azerbaijani_question_answering_pipeline_az_5.5.0_3.0_1725576393936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("azerbaijani_question_answering_pipeline", lang = "az") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("azerbaijani_question_answering_pipeline", lang = "az") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|azerbaijani_question_answering_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|az| +|Size:|1.3 GB| + +## References + +https://huggingface.co/interneuronai/az-question-answering + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en.md new file mode 100644 index 00000000000000..e8ad3cf0970a7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en_5.5.0_3.0_1725576499601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline_en_5.5.0_3.0_1725576499601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia1_1_25m_wikipedia_french1_25m_with_masking_finetuned_french_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/BabyBERTa-wikipedia1_1.25M_wikipedia_french1.25M-with-Masking-finetuned-french-SQuAD + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-balanced_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-balanced_model_en.md new file mode 100644 index 00000000000000..caf55496ff6e7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-balanced_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English balanced_model RoBertaForSequenceClassification from amishshah +author: John Snow Labs +name: balanced_model +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`balanced_model` is a English model originally trained by amishshah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/balanced_model_en_5.5.0_3.0_1725542549990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/balanced_model_en_5.5.0_3.0_1725542549990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("balanced_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("balanced_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|balanced_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/amishshah/balanced_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-balanced_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-balanced_model_pipeline_en.md new file mode 100644 index 00000000000000..2b2585cba1b4c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-balanced_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English balanced_model_pipeline pipeline RoBertaForSequenceClassification from amishshah +author: John Snow Labs +name: balanced_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`balanced_model_pipeline` is a English model originally trained by amishshah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/balanced_model_pipeline_en_5.5.0_3.0_1725542574023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/balanced_model_pipeline_en_5.5.0_3.0_1725542574023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("balanced_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("balanced_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|balanced_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/amishshah/balanced_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_en.md b/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_en.md new file mode 100644 index 00000000000000..a43fff712b24c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English banglaasr_bangla_speech_processing WhisperForCTC from bangla-speech-processing +author: John Snow Labs +name: banglaasr_bangla_speech_processing +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglaasr_bangla_speech_processing` is a English model originally trained by bangla-speech-processing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglaasr_bangla_speech_processing_en_5.5.0_3.0_1725548087042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglaasr_bangla_speech_processing_en_5.5.0_3.0_1725548087042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("banglaasr_bangla_speech_processing","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("banglaasr_bangla_speech_processing", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglaasr_bangla_speech_processing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/bangla-speech-processing/BanglaASR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_pipeline_en.md new file mode 100644 index 00000000000000..73af33fd9e9651 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-banglaasr_bangla_speech_processing_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English banglaasr_bangla_speech_processing_pipeline pipeline WhisperForCTC from bangla-speech-processing +author: John Snow Labs +name: banglaasr_bangla_speech_processing_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglaasr_bangla_speech_processing_pipeline` is a English model originally trained by bangla-speech-processing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglaasr_bangla_speech_processing_pipeline_en_5.5.0_3.0_1725548179765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglaasr_bangla_speech_processing_pipeline_en_5.5.0_3.0_1725548179765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("banglaasr_bangla_speech_processing_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("banglaasr_bangla_speech_processing_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglaasr_bangla_speech_processing_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/bangla-speech-processing/BanglaASR + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_base_german_uncased_dbmdz_de.md b/docs/_posts/ahmedlone127/2024-09-05-bert_base_german_uncased_dbmdz_de.md new file mode 100644 index 00000000000000..de3d530203b829 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_base_german_uncased_dbmdz_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German bert_base_german_uncased_dbmdz BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_german_uncased_dbmdz +date: 2024-09-05 +tags: [de, open_source, onnx, embeddings, bert] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_uncased_dbmdz` is a German model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_uncased_dbmdz_de_5.5.0_3.0_1725519637392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_uncased_dbmdz_de_5.5.0_3.0_1725519637392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_german_uncased_dbmdz","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_german_uncased_dbmdz","de") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_uncased_dbmdz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|de| +|Size:|409.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-german-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_pipeline_sv.md b/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_pipeline_sv.md new file mode 100644 index 00000000000000..b1bd45ca4aff4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_pipeline_sv.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_nepal_bhasa_pipeline pipeline BertEmbeddings from KBLab +author: John Snow Labs +name: bert_base_swedish_cased_nepal_bhasa_pipeline +date: 2024-09-05 +tags: [sv, open_source, pipeline, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_nepal_bhasa_pipeline` is a Swedish model originally trained by KBLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_nepal_bhasa_pipeline_sv_5.5.0_3.0_1725534225048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_nepal_bhasa_pipeline_sv_5.5.0_3.0_1725534225048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_swedish_cased_nepal_bhasa_pipeline", lang = "sv") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_swedish_cased_nepal_bhasa_pipeline", lang = "sv") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_nepal_bhasa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sv| +|Size:|504.9 MB| + +## References + +https://huggingface.co/KBLab/bert-base-swedish-cased-new + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_sv.md b/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_sv.md new file mode 100644 index 00000000000000..e2f10872f4c09c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_base_swedish_cased_nepal_bhasa_sv.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_nepal_bhasa BertEmbeddings from KBLab +author: John Snow Labs +name: bert_base_swedish_cased_nepal_bhasa +date: 2024-09-05 +tags: [sv, open_source, onnx, embeddings, bert] +task: Embeddings +language: sv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_nepal_bhasa` is a Swedish model originally trained by KBLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_nepal_bhasa_sv_5.5.0_3.0_1725534197148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_nepal_bhasa_sv_5.5.0_3.0_1725534197148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_swedish_cased_nepal_bhasa","sv") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_swedish_cased_nepal_bhasa","sv") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|sv| +|Size:|504.9 MB| + +## References + +https://huggingface.co/KBLab/bert-base-swedish-cased-new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_based_turkish_ner_wikiann_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-bert_based_turkish_ner_wikiann_pipeline_tr.md new file mode 100644 index 00000000000000..91efa4c9c728b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_based_turkish_ner_wikiann_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish bert_based_turkish_ner_wikiann_pipeline pipeline BertForTokenClassification from Gorengoz +author: John Snow Labs +name: bert_based_turkish_ner_wikiann_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_based_turkish_ner_wikiann_pipeline` is a Turkish model originally trained by Gorengoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_based_turkish_ner_wikiann_pipeline_tr_5.5.0_3.0_1725511686492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_based_turkish_ner_wikiann_pipeline_tr_5.5.0_3.0_1725511686492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_based_turkish_ner_wikiann_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_based_turkish_ner_wikiann_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_based_turkish_ner_wikiann_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Gorengoz/bert-based-Turkish-NER-wikiann + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_pipeline_tr.md new file mode 100644 index 00000000000000..45db2a9081d6e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish bert_cased_ner_pipeline pipeline BertForTokenClassification from SenswiseData +author: John Snow Labs +name: bert_cased_ner_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cased_ner_pipeline` is a Turkish model originally trained by SenswiseData. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cased_ner_pipeline_tr_5.5.0_3.0_1725538597003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cased_ner_pipeline_tr_5.5.0_3.0_1725538597003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_cased_ner_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_cased_ner_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cased_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|412.3 MB| + +## References + +https://huggingface.co/SenswiseData/bert_cased_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_tr.md b/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_tr.md new file mode 100644 index 00000000000000..961e1d999a0861 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_cased_ner_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish bert_cased_ner BertForTokenClassification from SenswiseData +author: John Snow Labs +name: bert_cased_ner +date: 2024-09-05 +tags: [tr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cased_ner` is a Turkish model originally trained by SenswiseData. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cased_ner_tr_5.5.0_3.0_1725538573092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cased_ner_tr_5.5.0_3.0_1725538573092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_cased_ner","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_cased_ner", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cased_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|412.3 MB| + +## References + +https://huggingface.co/SenswiseData/bert_cased_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_en.md new file mode 100644 index 00000000000000..c88bc490cb6527 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_finetuned1_arcd XlmRoBertaForQuestionAnswering from Echiguerkh +author: John Snow Labs +name: bert_finetuned1_arcd +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned1_arcd` is a English model originally trained by Echiguerkh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned1_arcd_en_5.5.0_3.0_1725556972043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned1_arcd_en_5.5.0_3.0_1725556972043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("bert_finetuned1_arcd","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("bert_finetuned1_arcd", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned1_arcd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|776.5 MB| + +## References + +https://huggingface.co/Echiguerkh/bert-finetuned1-arcd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_pipeline_en.md new file mode 100644 index 00000000000000..7626d666a6a3ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned1_arcd_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_finetuned1_arcd_pipeline pipeline XlmRoBertaForQuestionAnswering from Echiguerkh +author: John Snow Labs +name: bert_finetuned1_arcd_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned1_arcd_pipeline` is a English model originally trained by Echiguerkh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned1_arcd_pipeline_en_5.5.0_3.0_1725557117440.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned1_arcd_pipeline_en_5.5.0_3.0_1725557117440.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned1_arcd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned1_arcd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned1_arcd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|776.5 MB| + +## References + +https://huggingface.co/Echiguerkh/bert-finetuned1-arcd + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_en.md new file mode 100644 index 00000000000000..a8c7ffafa6d105 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned DistilBertForSequenceClassification from lvhma +author: John Snow Labs +name: bert_finetuned +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned` is a English model originally trained by lvhma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_en_5.5.0_3.0_1725579884501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_en_5.5.0_3.0_1725579884501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lvhma/bert_finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_en.md new file mode 100644 index 00000000000000..bd21d9baf10dca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_kirill_a BertForTokenClassification from Kirill-A +author: John Snow Labs +name: bert_finetuned_ner_kirill_a +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_kirill_a` is a English model originally trained by Kirill-A. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_kirill_a_en_5.5.0_3.0_1725563576900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_kirill_a_en_5.5.0_3.0_1725563576900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_kirill_a","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_kirill_a", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_kirill_a| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Kirill-A/BERT-Finetuned-NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_pipeline_en.md new file mode 100644 index 00000000000000..06c0e7a1b6a3f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_ner_kirill_a_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_kirill_a_pipeline pipeline BertForTokenClassification from Kirill-A +author: John Snow Labs +name: bert_finetuned_ner_kirill_a_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_kirill_a_pipeline` is a English model originally trained by Kirill-A. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_kirill_a_pipeline_en_5.5.0_3.0_1725563598850.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_kirill_a_pipeline_en_5.5.0_3.0_1725563598850.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_kirill_a_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_kirill_a_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_kirill_a_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Kirill-A/BERT-Finetuned-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..700cb341a61eee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_pipeline pipeline DistilBertForSequenceClassification from lvhma +author: John Snow Labs +name: bert_finetuned_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_pipeline` is a English model originally trained by lvhma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_pipeline_en_5.5.0_3.0_1725579896949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_pipeline_en_5.5.0_3.0_1725579896949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lvhma/bert_finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_semitic_languages_eval_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_semitic_languages_eval_english_pipeline_en.md new file mode 100644 index 00000000000000..fff8fa4bd7424f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_finetuned_semitic_languages_eval_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_semitic_languages_eval_english_pipeline pipeline MPNetForSequenceClassification from ClaudiaRichard +author: John Snow Labs +name: bert_finetuned_semitic_languages_eval_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_semitic_languages_eval_english_pipeline` is a English model originally trained by ClaudiaRichard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_semitic_languages_eval_english_pipeline_en_5.5.0_3.0_1725575536256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_semitic_languages_eval_english_pipeline_en_5.5.0_3.0_1725575536256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_semitic_languages_eval_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_semitic_languages_eval_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_semitic_languages_eval_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/ClaudiaRichard/bert-finetuned-sem_eval-english + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_gemma_2_2b_italian_imdb_2bit_0_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_gemma_2_2b_italian_imdb_2bit_0_en.md new file mode 100644 index 00000000000000..7ad1429c7288a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_gemma_2_2b_italian_imdb_2bit_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_gemma_2_2b_italian_imdb_2bit_0 DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: bert_gemma_2_2b_italian_imdb_2bit_0 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_gemma_2_2b_italian_imdb_2bit_0` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_gemma_2_2b_italian_imdb_2bit_0_en_5.5.0_3.0_1725507021003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_gemma_2_2b_italian_imdb_2bit_0_en_5.5.0_3.0_1725507021003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_gemma_2_2b_italian_imdb_2bit_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_gemma_2_2b_italian_imdb_2bit_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_gemma_2_2b_italian_imdb_2bit_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/BERT_gemma-2-2b-it_imdb_2bit_0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_pipeline_xx.md new file mode 100644 index 00000000000000..5b8fca0eb4ffc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual bert_medieval_multilingual_pipeline pipeline BertEmbeddings from magistermilitum +author: John Snow Labs +name: bert_medieval_multilingual_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medieval_multilingual_pipeline` is a Multilingual model originally trained by magistermilitum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medieval_multilingual_pipeline_xx_5.5.0_3.0_1725534015160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medieval_multilingual_pipeline_xx_5.5.0_3.0_1725534015160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_medieval_multilingual_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_medieval_multilingual_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medieval_multilingual_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/magistermilitum/bert_medieval_multilingual + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_xx.md b/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_xx.md new file mode 100644 index 00000000000000..d7c2272e1df4ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_medieval_multilingual_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual bert_medieval_multilingual BertEmbeddings from magistermilitum +author: John Snow Labs +name: bert_medieval_multilingual +date: 2024-09-05 +tags: [xx, open_source, onnx, embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medieval_multilingual` is a Multilingual model originally trained by magistermilitum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medieval_multilingual_xx_5.5.0_3.0_1725533982268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medieval_multilingual_xx_5.5.0_3.0_1725533982268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_medieval_multilingual","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_medieval_multilingual","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medieval_multilingual| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|xx| +|Size:|664.9 MB| + +## References + +https://huggingface.co/magistermilitum/bert_medieval_multilingual \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx.md new file mode 100644 index 00000000000000..01c96c0d9176f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english BertForTokenClassification from StivenLancheros +author: John Snow Labs +name: bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english +date: 2024-09-05 +tags: [xx, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english` is a Multilingual model originally trained by StivenLancheros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx_5.5.0_3.0_1725511055105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english_xx_5.5.0_3.0_1725511055105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_biobert_base_cased_v1.2_finetuned_ner_craft_augmentedtransfer_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|403.7 MB| + +## References + +https://huggingface.co/StivenLancheros/biobert-base-cased-v1.2-finetuned-ner-CRAFT_AugmentedTransfer_EN \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_en.md new file mode 100644 index 00000000000000..b5fb03478d412f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_ner_cause_effect_detection BertForTokenClassification from noahjadallah +author: John Snow Labs +name: bert_ner_cause_effect_detection +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_cause_effect_detection` is a English model originally trained by noahjadallah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_cause_effect_detection_en_5.5.0_3.0_1725564033646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_cause_effect_detection_en_5.5.0_3.0_1725564033646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_cause_effect_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_cause_effect_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_cause_effect_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/noahjadallah/cause-effect-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_pipeline_en.md new file mode 100644 index 00000000000000..544d4e0a1861a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_cause_effect_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_cause_effect_detection_pipeline pipeline BertForTokenClassification from noahjadallah +author: John Snow Labs +name: bert_ner_cause_effect_detection_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_cause_effect_detection_pipeline` is a English model originally trained by noahjadallah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_cause_effect_detection_pipeline_en_5.5.0_3.0_1725564055700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_cause_effect_detection_pipeline_en_5.5.0_3.0_1725564055700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_cause_effect_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_cause_effect_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_cause_effect_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/noahjadallah/cause-effect-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_en.md new file mode 100644 index 00000000000000..5b9333c655622e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_ner_classifier BertForTokenClassification from phanerozoic +author: John Snow Labs +name: bert_ner_classifier +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_classifier` is a English model originally trained by phanerozoic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_classifier_en_5.5.0_3.0_1725511006540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_classifier_en_5.5.0_3.0_1725511006540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/phanerozoic/BERT-NER-Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_pipeline_en.md new file mode 100644 index 00000000000000..3f165dbfbaf3ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_classifier_pipeline pipeline BertForTokenClassification from phanerozoic +author: John Snow Labs +name: bert_ner_classifier_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_classifier_pipeline` is a English model originally trained by phanerozoic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_classifier_pipeline_en_5.5.0_3.0_1725511024905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_classifier_pipeline_en_5.5.0_3.0_1725511024905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/phanerozoic/BERT-NER-Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_rubertconv_toxic_editor_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_rubertconv_toxic_editor_pipeline_ru.md new file mode 100644 index 00000000000000..c2a776fb28cfa4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_rubertconv_toxic_editor_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian bert_ner_rubertconv_toxic_editor_pipeline pipeline BertForTokenClassification from IlyaGusev +author: John Snow Labs +name: bert_ner_rubertconv_toxic_editor_pipeline +date: 2024-09-05 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_rubertconv_toxic_editor_pipeline` is a Russian model originally trained by IlyaGusev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_rubertconv_toxic_editor_pipeline_ru_5.5.0_3.0_1725511172563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_rubertconv_toxic_editor_pipeline_ru_5.5.0_3.0_1725511172563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_rubertconv_toxic_editor_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_rubertconv_toxic_editor_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_rubertconv_toxic_editor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|662.2 MB| + +## References + +https://huggingface.co/IlyaGusev/rubertconv_toxic_editor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_en.md new file mode 100644 index 00000000000000..5d6be5adc2acaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_ner_spacebert_cree BertForTokenClassification from icelab +author: John Snow Labs +name: bert_ner_spacebert_cree +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_spacebert_cree` is a English model originally trained by icelab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_spacebert_cree_en_5.5.0_3.0_1725563415578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_spacebert_cree_en_5.5.0_3.0_1725563415578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_spacebert_cree","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_spacebert_cree", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_spacebert_cree| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/icelab/spacebert_CR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_pipeline_en.md new file mode 100644 index 00000000000000..3e4a523ca52f70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacebert_cree_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_spacebert_cree_pipeline pipeline BertForTokenClassification from icelab +author: John Snow Labs +name: bert_ner_spacebert_cree_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_spacebert_cree_pipeline` is a English model originally trained by icelab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_spacebert_cree_pipeline_en_5.5.0_3.0_1725563436885.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_spacebert_cree_pipeline_en_5.5.0_3.0_1725563436885.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_spacebert_cree_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_spacebert_cree_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_spacebert_cree_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/icelab/spacebert_CR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_en.md new file mode 100644 index 00000000000000..da47a464207c95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_ner_spacescibert_cree BertForTokenClassification from icelab +author: John Snow Labs +name: bert_ner_spacescibert_cree +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_spacescibert_cree` is a English model originally trained by icelab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_spacescibert_cree_en_5.5.0_3.0_1725538813066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_spacescibert_cree_en_5.5.0_3.0_1725538813066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_ner_spacescibert_cree","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_ner_spacescibert_cree", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_spacescibert_cree| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/icelab/spacescibert_CR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_pipeline_en.md new file mode 100644 index 00000000000000..74a148f54a131d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_ner_spacescibert_cree_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_spacescibert_cree_pipeline pipeline BertForTokenClassification from icelab +author: John Snow Labs +name: bert_ner_spacescibert_cree_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_spacescibert_cree_pipeline` is a English model originally trained by icelab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_spacescibert_cree_pipeline_en_5.5.0_3.0_1725538833445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_spacescibert_cree_pipeline_en_5.5.0_3.0_1725538833445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_spacescibert_cree_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_spacescibert_cree_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_spacescibert_cree_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/icelab/spacescibert_CR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_en.md new file mode 100644 index 00000000000000..217910bba29d68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English BertForQuestionAnswering Cased model (from MarianaLC) +author: John Snow Labs +name: bert_qa_marianalc_finetuned_squad_accelerate +date: 2024-09-05 +tags: [en, open_source, bert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-finetuned-squad-accelerate` is a English model originally trained by `MarianaLC`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_qa_marianalc_finetuned_squad_accelerate_en_5.5.0_3.0_1725554120169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_qa_marianalc_finetuned_squad_accelerate_en_5.5.0_3.0_1725554120169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = BertForQuestionAnswering.pretrained("bert_qa_marianalc_finetuned_squad_accelerate","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = BertForQuestionAnswering.pretrained("bert_qa_marianalc_finetuned_squad_accelerate","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_qa_marianalc_finetuned_squad_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|403.7 MB| + +## References + +References + +- https://huggingface.co/MarianaLC/bert-finetuned-squad-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en.md new file mode 100644 index 00000000000000..fef5377b7f36b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_qa_marianalc_finetuned_squad_accelerate_pipeline pipeline BertForQuestionAnswering from MarianaLC +author: John Snow Labs +name: bert_qa_marianalc_finetuned_squad_accelerate_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_qa_marianalc_finetuned_squad_accelerate_pipeline` is a English model originally trained by MarianaLC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en_5.5.0_3.0_1725554141248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_qa_marianalc_finetuned_squad_accelerate_pipeline_en_5.5.0_3.0_1725554141248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_qa_marianalc_finetuned_squad_accelerate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_qa_marianalc_finetuned_squad_accelerate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_qa_marianalc_finetuned_squad_accelerate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/MarianaLC/bert-finetuned-squad-accelerate + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_en.md new file mode 100644 index 00000000000000..79217640ae6aac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_question_answering_cased_squadv2_turkish BertForQuestionAnswering from okanvk +author: John Snow Labs +name: bert_question_answering_cased_squadv2_turkish +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_question_answering_cased_squadv2_turkish` is a English model originally trained by okanvk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_question_answering_cased_squadv2_turkish_en_5.5.0_3.0_1725559942725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_question_answering_cased_squadv2_turkish_en_5.5.0_3.0_1725559942725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_question_answering_cased_squadv2_turkish","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_question_answering_cased_squadv2_turkish", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_question_answering_cased_squadv2_turkish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/okanvk/bert-question-answering-cased-squadv2_tr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_pipeline_en.md new file mode 100644 index 00000000000000..0e8ab5e0e76f08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_question_answering_cased_squadv2_turkish_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_question_answering_cased_squadv2_turkish_pipeline pipeline BertForQuestionAnswering from okanvk +author: John Snow Labs +name: bert_question_answering_cased_squadv2_turkish_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_question_answering_cased_squadv2_turkish_pipeline` is a English model originally trained by okanvk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_question_answering_cased_squadv2_turkish_pipeline_en_5.5.0_3.0_1725559964318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_question_answering_cased_squadv2_turkish_pipeline_en_5.5.0_3.0_1725559964318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_question_answering_cased_squadv2_turkish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_question_answering_cased_squadv2_turkish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_question_answering_cased_squadv2_turkish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/okanvk/bert-question-answering-cased-squadv2_tr + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_autotrain_jobberta_23_3671398065_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_autotrain_jobberta_23_3671398065_en.md new file mode 100644 index 00000000000000..5d7e73ec394910 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_autotrain_jobberta_23_3671398065_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English BertForTokenClassification Cased model (from Andrei95) +author: John Snow Labs +name: bert_token_classifier_autotrain_jobberta_23_3671398065 +date: 2024-09-05 +tags: [en, open_source, bert, token_classification, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autotrain-jobberta-23-3671398065` is a English model originally trained by `Andrei95`. + +## Predicted Entities + +`Skill-Knowledge`, `Skill`, `Knowledge` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_autotrain_jobberta_23_3671398065_en_5.5.0_3.0_1725539034759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_autotrain_jobberta_23_3671398065_en_5.5.0_3.0_1725539034759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_autotrain_jobberta_23_3671398065","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_autotrain_jobberta_23_3671398065","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_autotrain_jobberta_23_3671398065| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +References + +- https://huggingface.co/Andrei95/autotrain-jobberta-23-3671398065 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_foodbase_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_foodbase_ner_pipeline_en.md new file mode 100644 index 00000000000000..44ead9bf1b46c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_foodbase_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_token_classifier_foodbase_ner_pipeline pipeline BertForTokenClassification from Dizex +author: John Snow Labs +name: bert_token_classifier_foodbase_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_foodbase_ner_pipeline` is a English model originally trained by Dizex. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_foodbase_ner_pipeline_en_5.5.0_3.0_1725563496860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_foodbase_ner_pipeline_en_5.5.0_3.0_1725563496860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_foodbase_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_foodbase_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_foodbase_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Dizex/FoodBaseBERT-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_en.md new file mode 100644 index 00000000000000..8f53cba8cc4b58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_en.md @@ -0,0 +1,102 @@ +--- +layout: model +title: English BertForTokenClassification Large Cased model (from dslim) +author: John Snow Labs +name: bert_token_classifier_large_ner +date: 2024-09-05 +tags: [en, open_source, bert, token_classification, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-large-NER` is a English model originally trained by `dslim`. + +## Predicted Entities + +`PER`, `ORG`, `LOC`, `MISC` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_large_ner_en_5.5.0_3.0_1725538762277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_large_ner_en_5.5.0_3.0_1725538762277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_large_ner","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_large_ner","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_large_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +References + +- https://huggingface.co/dslim/bert-large-NER +- https://www.aclweb.org/anthology/W03-0419.pdf +- https://www.aclweb.org/anthology/W03-0419.pdf +- https://arxiv.org/pdf/1810.04805 +- https://github.com/google-research/bert/issues/223 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_pipeline_en.md new file mode 100644 index 00000000000000..2dba800f4b58f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_large_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_token_classifier_large_ner_pipeline pipeline BertForTokenClassification from dslim +author: John Snow Labs +name: bert_token_classifier_large_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_large_ner_pipeline` is a English model originally trained by dslim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_large_ner_pipeline_en_5.5.0_3.0_1725538825725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_large_ner_pipeline_en_5.5.0_3.0_1725538825725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_large_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_large_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_large_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/dslim/bert-large-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_no.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_no.md new file mode 100644 index 00000000000000..021b2b906cb0f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_no.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Norwegian bert_token_classifier_norwegian_bokml_base_ner BertForTokenClassification from NbAiLab +author: John Snow Labs +name: bert_token_classifier_norwegian_bokml_base_ner +date: 2024-09-05 +tags: ["no", open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_norwegian_bokml_base_ner` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_norwegian_bokml_base_ner_no_5.5.0_3.0_1725563345621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_norwegian_bokml_base_ner_no_5.5.0_3.0_1725563345621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_norwegian_bokml_base_ner","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_norwegian_bokml_base_ner", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_norwegian_bokml_base_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|no| +|Size:|464.3 MB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-base-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_pipeline_no.md new file mode 100644 index 00000000000000..4b18b3f92ab6c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_norwegian_bokml_base_ner_pipeline_no.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Norwegian bert_token_classifier_norwegian_bokml_base_ner_pipeline pipeline BertForTokenClassification from NbAiLab +author: John Snow Labs +name: bert_token_classifier_norwegian_bokml_base_ner_pipeline +date: 2024-09-05 +tags: ["no", open_source, pipeline, onnx] +task: Named Entity Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_norwegian_bokml_base_ner_pipeline` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_norwegian_bokml_base_ner_pipeline_no_5.5.0_3.0_1725563370680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_norwegian_bokml_base_ner_pipeline_no_5.5.0_3.0_1725563370680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_norwegian_bokml_base_ner_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_norwegian_bokml_base_ner_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_norwegian_bokml_base_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|464.4 MB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-base-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_fa.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_fa.md new file mode 100644 index 00000000000000..1c12568f624882 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian bert_token_classifier_parsbert_peymaner BertForTokenClassification from HooshvareLab +author: John Snow Labs +name: bert_token_classifier_parsbert_peymaner +date: 2024-09-05 +tags: [fa, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_parsbert_peymaner` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_parsbert_peymaner_fa_5.5.0_3.0_1725563296967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_parsbert_peymaner_fa_5.5.0_3.0_1725563296967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_parsbert_peymaner","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_parsbert_peymaner", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_parsbert_peymaner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|606.5 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-base-parsbert-peymaner-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_pipeline_fa.md new file mode 100644 index 00000000000000..d3ffa2c5d9387c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_parsbert_peymaner_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian bert_token_classifier_parsbert_peymaner_pipeline pipeline BertForTokenClassification from HooshvareLab +author: John Snow Labs +name: bert_token_classifier_parsbert_peymaner_pipeline +date: 2024-09-05 +tags: [fa, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_parsbert_peymaner_pipeline` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_parsbert_peymaner_pipeline_fa_5.5.0_3.0_1725563333293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_parsbert_peymaner_pipeline_fa_5.5.0_3.0_1725563333293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_parsbert_peymaner_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_parsbert_peymaner_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_parsbert_peymaner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|606.5 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-base-parsbert-peymaner-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_reddit_ner_place_names_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_reddit_ner_place_names_pipeline_en.md new file mode 100644 index 00000000000000..af5919a617c1a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_reddit_ner_place_names_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_token_classifier_reddit_ner_place_names_pipeline pipeline BertForTokenClassification from cjber +author: John Snow Labs +name: bert_token_classifier_reddit_ner_place_names_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_reddit_ner_place_names_pipeline` is a English model originally trained by cjber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_reddit_ner_place_names_pipeline_en_5.5.0_3.0_1725511627820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_reddit_ner_place_names_pipeline_en_5.5.0_3.0_1725511627820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_reddit_ner_place_names_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_reddit_ner_place_names_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_reddit_ner_place_names_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/cjber/reddit-ner-place_names + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_uncased_keyword_discriminator_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_uncased_keyword_discriminator_pipeline_en.md new file mode 100644 index 00000000000000..b3c7ee2a609f77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_token_classifier_uncased_keyword_discriminator_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_token_classifier_uncased_keyword_discriminator_pipeline pipeline BertForTokenClassification from yanekyuk +author: John Snow Labs +name: bert_token_classifier_uncased_keyword_discriminator_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_uncased_keyword_discriminator_pipeline` is a English model originally trained by yanekyuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_uncased_keyword_discriminator_pipeline_en_5.5.0_3.0_1725511572026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_uncased_keyword_discriminator_pipeline_en_5.5.0_3.0_1725511572026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_uncased_keyword_discriminator_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_uncased_keyword_discriminator_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_uncased_keyword_discriminator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/yanekyuk/bert-uncased-keyword-discriminator + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_en.md new file mode 100644 index 00000000000000..619b279e1b46b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_web_bulgarian_cased BertEmbeddings from usmiva +author: John Snow Labs +name: bert_web_bulgarian_cased +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_web_bulgarian_cased` is a English model originally trained by usmiva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_cased_en_5.5.0_3.0_1725533627001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_cased_en_5.5.0_3.0_1725533627001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_web_bulgarian_cased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_web_bulgarian_cased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_web_bulgarian_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/usmiva/bert-web-bg-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_pipeline_en.md new file mode 100644 index 00000000000000..94ab87f0305bae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bert_web_bulgarian_cased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_web_bulgarian_cased_pipeline pipeline BertEmbeddings from usmiva +author: John Snow Labs +name: bert_web_bulgarian_cased_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_web_bulgarian_cased_pipeline` is a English model originally trained by usmiva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_cased_pipeline_en_5.5.0_3.0_1725533647081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_cased_pipeline_en_5.5.0_3.0_1725533647081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_web_bulgarian_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_web_bulgarian_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_web_bulgarian_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/usmiva/bert-web-bg-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bertimbau_base_sayula_popoluca_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-bertimbau_base_sayula_popoluca_2_en.md new file mode 100644 index 00000000000000..533e3df6c67fbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bertimbau_base_sayula_popoluca_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bertimbau_base_sayula_popoluca_2 BertForTokenClassification from LendeaViva +author: John Snow Labs +name: bertimbau_base_sayula_popoluca_2 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_base_sayula_popoluca_2` is a English model originally trained by LendeaViva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_base_sayula_popoluca_2_en_5.5.0_3.0_1725516172021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_base_sayula_popoluca_2_en_5.5.0_3.0_1725516172021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bertimbau_base_sayula_popoluca_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bertimbau_base_sayula_popoluca_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_base_sayula_popoluca_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/LendeaViva/bertimbau_base_pos_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bertislav_cu.md b/docs/_posts/ahmedlone127/2024-09-05-bertislav_cu.md new file mode 100644 index 00000000000000..3e767c836c0354 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bertislav_cu.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic bertislav BertEmbeddings from npedrazzini +author: John Snow Labs +name: bertislav +date: 2024-09-05 +tags: [cu, open_source, onnx, embeddings, bert] +task: Embeddings +language: cu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertislav` is a Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic model originally trained by npedrazzini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertislav_cu_5.5.0_3.0_1725534193288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertislav_cu_5.5.0_3.0_1725534193288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bertislav","cu") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bertislav","cu") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertislav| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|cu| +|Size:|666.9 MB| + +## References + +https://huggingface.co/npedrazzini/BERTislav \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bertislav_pipeline_cu.md b/docs/_posts/ahmedlone127/2024-09-05-bertislav_pipeline_cu.md new file mode 100644 index 00000000000000..bb56f29e353f3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bertislav_pipeline_cu.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic bertislav_pipeline pipeline BertEmbeddings from npedrazzini +author: John Snow Labs +name: bertislav_pipeline +date: 2024-09-05 +tags: [cu, open_source, pipeline, onnx] +task: Embeddings +language: cu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertislav_pipeline` is a Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic model originally trained by npedrazzini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertislav_pipeline_cu_5.5.0_3.0_1725534229154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertislav_pipeline_cu_5.5.0_3.0_1725534229154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertislav_pipeline", lang = "cu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertislav_pipeline", lang = "cu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertislav_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|cu| +|Size:|666.9 MB| + +## References + +https://huggingface.co/npedrazzini/BERTislav + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_pipeline_tr.md new file mode 100644 index 00000000000000..c3b83e19b9a0b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish berturk_legal_pipeline pipeline BertEmbeddings from KocLab-Bilkent +author: John Snow Labs +name: berturk_legal_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berturk_legal_pipeline` is a Turkish model originally trained by KocLab-Bilkent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berturk_legal_pipeline_tr_5.5.0_3.0_1725519870194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berturk_legal_pipeline_tr_5.5.0_3.0_1725519870194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("berturk_legal_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("berturk_legal_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berturk_legal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|689.4 MB| + +## References + +https://huggingface.co/KocLab-Bilkent/BERTurk-Legal + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_tr.md b/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_tr.md new file mode 100644 index 00000000000000..69b220a5bb5ce5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-berturk_legal_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish berturk_legal BertEmbeddings from KocLab-Bilkent +author: John Snow Labs +name: berturk_legal +date: 2024-09-05 +tags: [tr, open_source, onnx, embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berturk_legal` is a Turkish model originally trained by KocLab-Bilkent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berturk_legal_tr_5.5.0_3.0_1725519838350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berturk_legal_tr_5.5.0_3.0_1725519838350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("berturk_legal","tr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("berturk_legal","tr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berturk_legal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|tr| +|Size:|689.4 MB| + +## References + +https://huggingface.co/KocLab-Bilkent/BERTurk-Legal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_en.md b/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_en.md new file mode 100644 index 00000000000000..68df18e43b5eb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bertwithmetadata DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: bertwithmetadata +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertwithmetadata` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertwithmetadata_en_5.5.0_3.0_1725495540537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertwithmetadata_en_5.5.0_3.0_1725495540537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("bertwithmetadata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("bertwithmetadata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertwithmetadata| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/BertWithMetaData \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_pipeline_en.md new file mode 100644 index 00000000000000..d4ed5329a95e85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bertwithmetadata_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bertwithmetadata_pipeline pipeline DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: bertwithmetadata_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertwithmetadata_pipeline` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertwithmetadata_pipeline_en_5.5.0_3.0_1725495555155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertwithmetadata_pipeline_en_5.5.0_3.0_1725495555155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertwithmetadata_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertwithmetadata_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertwithmetadata_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/BertWithMetaData + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_citi_dataset_9k_1k_e1_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_citi_dataset_9k_1k_e1_en.md new file mode 100644 index 00000000000000..27205881e3761f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_citi_dataset_9k_1k_e1_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_citi_dataset_9k_1k_e1 BGEEmbeddings from MugheesAwan11 +author: John Snow Labs +name: bge_base_citi_dataset_9k_1k_e1 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_citi_dataset_9k_1k_e1` is a English model originally trained by MugheesAwan11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_citi_dataset_9k_1k_e1_en_5.5.0_3.0_1725517305869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_citi_dataset_9k_1k_e1_en_5.5.0_3.0_1725517305869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_citi_dataset_9k_1k_e1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_citi_dataset_9k_1k_e1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_citi_dataset_9k_1k_e1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|391.7 MB| + +## References + +https://huggingface.co/MugheesAwan11/bge-base-citi-dataset-9k-1k-e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_en.md new file mode 100644 index 00000000000000..49c4f07c753229 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_financial_matryoshka_jaswanth160 BGEEmbeddings from Jaswanth160 +author: John Snow Labs +name: bge_base_financial_matryoshka_jaswanth160 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_financial_matryoshka_jaswanth160` is a English model originally trained by Jaswanth160. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_jaswanth160_en_5.5.0_3.0_1725517362012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_jaswanth160_en_5.5.0_3.0_1725517362012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_jaswanth160","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_jaswanth160","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_financial_matryoshka_jaswanth160| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|387.2 MB| + +## References + +https://huggingface.co/Jaswanth160/bge-base-financial-matryoshka \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_pipeline_en.md new file mode 100644 index 00000000000000..af73d44df65b08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_jaswanth160_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_financial_matryoshka_jaswanth160_pipeline pipeline BGEEmbeddings from Jaswanth160 +author: John Snow Labs +name: bge_base_financial_matryoshka_jaswanth160_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_financial_matryoshka_jaswanth160_pipeline` is a English model originally trained by Jaswanth160. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_jaswanth160_pipeline_en_5.5.0_3.0_1725517388195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_jaswanth160_pipeline_en_5.5.0_3.0_1725517388195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_financial_matryoshka_jaswanth160_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_financial_matryoshka_jaswanth160_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_financial_matryoshka_jaswanth160_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.2 MB| + +## References + +https://huggingface.co/Jaswanth160/bge-base-financial-matryoshka + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_en.md new file mode 100644 index 00000000000000..99b5e84162c12a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_financial_matryoshka_kr_manish BGEEmbeddings from kr-manish +author: John Snow Labs +name: bge_base_financial_matryoshka_kr_manish +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_financial_matryoshka_kr_manish` is a English model originally trained by kr-manish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_kr_manish_en_5.5.0_3.0_1725517349045.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_kr_manish_en_5.5.0_3.0_1725517349045.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_kr_manish","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_financial_matryoshka_kr_manish","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_financial_matryoshka_kr_manish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|256.0 MB| + +## References + +https://huggingface.co/kr-manish/bge-base-financial-matryoshka \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_pipeline_en.md new file mode 100644 index 00000000000000..8cea660fc61ff0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_financial_matryoshka_kr_manish_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_financial_matryoshka_kr_manish_pipeline pipeline BGEEmbeddings from kr-manish +author: John Snow Labs +name: bge_base_financial_matryoshka_kr_manish_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_financial_matryoshka_kr_manish_pipeline` is a English model originally trained by kr-manish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_kr_manish_pipeline_en_5.5.0_3.0_1725517426182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_financial_matryoshka_kr_manish_pipeline_en_5.5.0_3.0_1725517426182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_financial_matryoshka_kr_manish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_financial_matryoshka_kr_manish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_financial_matryoshka_kr_manish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|256.0 MB| + +## References + +https://huggingface.co/kr-manish/bge-base-financial-matryoshka + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_en.md new file mode 100644 index 00000000000000..5b1a9d31044779 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_base_securiti_dataset_1_v20 BGEEmbeddings from MugheesAwan11 +author: John Snow Labs +name: bge_base_securiti_dataset_1_v20 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_securiti_dataset_1_v20` is a English model originally trained by MugheesAwan11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_securiti_dataset_1_v20_en_5.5.0_3.0_1725517048753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_securiti_dataset_1_v20_en_5.5.0_3.0_1725517048753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_base_securiti_dataset_1_v20","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_base_securiti_dataset_1_v20","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_securiti_dataset_1_v20| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|380.7 MB| + +## References + +https://huggingface.co/MugheesAwan11/bge-base-securiti-dataset-1-v20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_pipeline_en.md new file mode 100644 index 00000000000000..0068b67aa78941 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_base_securiti_dataset_1_v20_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_base_securiti_dataset_1_v20_pipeline pipeline BGEEmbeddings from MugheesAwan11 +author: John Snow Labs +name: bge_base_securiti_dataset_1_v20_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_base_securiti_dataset_1_v20_pipeline` is a English model originally trained by MugheesAwan11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_base_securiti_dataset_1_v20_pipeline_en_5.5.0_3.0_1725517077876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_base_securiti_dataset_1_v20_pipeline_en_5.5.0_3.0_1725517077876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_base_securiti_dataset_1_v20_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_base_securiti_dataset_1_v20_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_base_securiti_dataset_1_v20_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|380.7 MB| + +## References + +https://huggingface.co/MugheesAwan11/bge-base-securiti-dataset-1-v20 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_large_chinese_v1_6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_large_chinese_v1_6_pipeline_en.md new file mode 100644 index 00000000000000..0981e788c848ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_large_chinese_v1_6_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_large_chinese_v1_6_pipeline pipeline BGEEmbeddings from clinno +author: John Snow Labs +name: bge_large_chinese_v1_6_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_large_chinese_v1_6_pipeline` is a English model originally trained by clinno. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_large_chinese_v1_6_pipeline_en_5.5.0_3.0_1725517001198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_large_chinese_v1_6_pipeline_en_5.5.0_3.0_1725517001198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_large_chinese_v1_6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_large_chinese_v1_6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_large_chinese_v1_6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/clinno/bge-large-zh-v1.6 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_micro_v2_esg_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_micro_v2_esg_v2_pipeline_en.md new file mode 100644 index 00000000000000..30b0e7f432ab9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_micro_v2_esg_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_micro_v2_esg_v2_pipeline pipeline BGEEmbeddings from elsayovita +author: John Snow Labs +name: bge_micro_v2_esg_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_micro_v2_esg_v2_pipeline` is a English model originally trained by elsayovita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_micro_v2_esg_v2_pipeline_en_5.5.0_3.0_1725516834321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_micro_v2_esg_v2_pipeline_en_5.5.0_3.0_1725516834321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_micro_v2_esg_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_micro_v2_esg_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_micro_v2_esg_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|55.4 MB| + +## References + +https://huggingface.co/elsayovita/bge-micro-v2-esg-v2 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_small_bioasq_3epochs_batch32_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_small_bioasq_3epochs_batch32_pipeline_en.md new file mode 100644 index 00000000000000..f000abf6b3a76b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_small_bioasq_3epochs_batch32_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_small_bioasq_3epochs_batch32_pipeline pipeline BGEEmbeddings from juanpablomesa +author: John Snow Labs +name: bge_small_bioasq_3epochs_batch32_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_bioasq_3epochs_batch32_pipeline` is a English model originally trained by juanpablomesa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_bioasq_3epochs_batch32_pipeline_en_5.5.0_3.0_1725517306027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_bioasq_3epochs_batch32_pipeline_en_5.5.0_3.0_1725517306027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_small_bioasq_3epochs_batch32_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_small_bioasq_3epochs_batch32_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_bioasq_3epochs_batch32_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|115.7 MB| + +## References + +https://huggingface.co/juanpablomesa/bge-small-bioasq-3epochs-batch32 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_esg_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_esg_v2_pipeline_en.md new file mode 100644 index 00000000000000..d21ced20416fde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_esg_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_small_english_v1_5_esg_v2_pipeline pipeline BGEEmbeddings from elsayovita +author: John Snow Labs +name: bge_small_english_v1_5_esg_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_esg_v2_pipeline` is a English model originally trained by elsayovita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_esg_v2_pipeline_en_5.5.0_3.0_1725517399385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_esg_v2_pipeline_en_5.5.0_3.0_1725517399385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_small_english_v1_5_esg_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_small_english_v1_5_esg_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_esg_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|115.1 MB| + +## References + +https://huggingface.co/elsayovita/bge-small-en-v1.5-esg-v2 + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en.md new file mode 100644 index 00000000000000..f3eecd9ccc8757 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en.md @@ -0,0 +1,87 @@ +--- +layout: model +title: English bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test BGEEmbeddings from izayashiro +author: John Snow Labs +name: bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bge] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BGEEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test` is a English model originally trained by izayashiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en_5.5.0_3.0_1725517493224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_en_5.5.0_3.0_1725517493224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + + +val embeddings = BGEEmbeddings.pretrained("bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp).toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[bge]| +|Language:|en| +|Size:|111.7 MB| + +## References + +https://huggingface.co/izayashiro/bge-small-en-v1.5-hpc-lab-docs-fine-tuned-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en.md new file mode 100644 index 00000000000000..b9f32b9dc37832 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline pipeline BGEEmbeddings from izayashiro +author: John Snow Labs +name: bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BGEEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline` is a English model originally trained by izayashiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en_5.5.0_3.0_1725517503099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline_en_5.5.0_3.0_1725517503099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_small_english_v1_5_hpc_lab_docs_fine_tuned_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|111.7 MB| + +## References + +https://huggingface.co/izayashiro/bge-small-en-v1.5-hpc-lab-docs-fine-tuned-test + +## Included Models + +- DocumentAssembler +- BGEEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_en.md b/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_en.md new file mode 100644 index 00000000000000..79c3a6f95b70c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bias_detection_ner BertForTokenClassification from maximuspowers +author: John Snow Labs +name: bias_detection_ner +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bias_detection_ner` is a English model originally trained by maximuspowers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bias_detection_ner_en_5.5.0_3.0_1725511250413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bias_detection_ner_en_5.5.0_3.0_1725511250413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bias_detection_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bias_detection_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bias_detection_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/maximuspowers/bias-detection-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_pipeline_en.md new file mode 100644 index 00000000000000..99a60375e55c1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bias_detection_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bias_detection_ner_pipeline pipeline BertForTokenClassification from maximuspowers +author: John Snow Labs +name: bias_detection_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bias_detection_ner_pipeline` is a English model originally trained by maximuspowers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bias_detection_ner_pipeline_en_5.5.0_3.0_1725511269513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bias_detection_ner_pipeline_en_5.5.0_3.0_1725511269513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bias_detection_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bias_detection_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bias_detection_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/maximuspowers/bias-detection-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_en.md new file mode 100644 index 00000000000000..c5a082bc548205 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bible_roberta_base RoBertaEmbeddings from abhi1nandy2 +author: John Snow Labs +name: bible_roberta_base +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bible_roberta_base` is a English model originally trained by abhi1nandy2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bible_roberta_base_en_5.5.0_3.0_1725573180026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bible_roberta_base_en_5.5.0_3.0_1725573180026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bible_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bible_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bible_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/abhi1nandy2/Bible-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..4047cfb0ddca54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bible_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bible_roberta_base_pipeline pipeline RoBertaEmbeddings from abhi1nandy2 +author: John Snow Labs +name: bible_roberta_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bible_roberta_base_pipeline` is a English model originally trained by abhi1nandy2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bible_roberta_base_pipeline_en_5.5.0_3.0_1725573204525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bible_roberta_base_pipeline_en_5.5.0_3.0_1725573204525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bible_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bible_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bible_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/abhi1nandy2/Bible-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_en.md b/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_en.md new file mode 100644 index 00000000000000..a9b3639edefaf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biobert_fachpraktikum BertForTokenClassification from vaitekunas +author: John Snow Labs +name: biobert_fachpraktikum +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_fachpraktikum` is a English model originally trained by vaitekunas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_fachpraktikum_en_5.5.0_3.0_1725563467128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_fachpraktikum_en_5.5.0_3.0_1725563467128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biobert_fachpraktikum","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biobert_fachpraktikum", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_fachpraktikum| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/vaitekunas/biobert-fachpraktikum \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_pipeline_en.md new file mode 100644 index 00000000000000..bbf27b898a6604 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biobert_fachpraktikum_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biobert_fachpraktikum_pipeline pipeline BertForTokenClassification from vaitekunas +author: John Snow Labs +name: biobert_fachpraktikum_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_fachpraktikum_pipeline` is a English model originally trained by vaitekunas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_fachpraktikum_pipeline_en_5.5.0_3.0_1725563488168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_fachpraktikum_pipeline_en_5.5.0_3.0_1725563488168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biobert_fachpraktikum_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biobert_fachpraktikum_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_fachpraktikum_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/vaitekunas/biobert-fachpraktikum + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_en.md b/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_en.md new file mode 100644 index 00000000000000..862baf3575ad25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biobert_full_finetuned_ner_pablo BertForTokenClassification from pabRomero +author: John Snow Labs +name: biobert_full_finetuned_ner_pablo +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_full_finetuned_ner_pablo` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725515488235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725515488235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biobert_full_finetuned_ner_pablo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biobert_full_finetuned_ner_pablo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_full_finetuned_ner_pablo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/pabRomero/BioBERT-full-finetuned-ner-pablo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_pipeline_en.md new file mode 100644 index 00000000000000..d1df74d1c81bf3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biobert_full_finetuned_ner_pablo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biobert_full_finetuned_ner_pablo_pipeline pipeline BertForTokenClassification from pabRomero +author: John Snow Labs +name: biobert_full_finetuned_ner_pablo_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_full_finetuned_ner_pablo_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725515507738.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725515507738.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biobert_full_finetuned_ner_pablo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biobert_full_finetuned_ner_pablo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_full_finetuned_ner_pablo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/pabRomero/BioBERT-full-finetuned-ner-pablo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_en.md new file mode 100644 index 00000000000000..676ffe9d201b6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomed_roberta_base_4096 RoBertaEmbeddings from simonlevine +author: John Snow Labs +name: biomed_roberta_base_4096 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomed_roberta_base_4096` is a English model originally trained by simonlevine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomed_roberta_base_4096_en_5.5.0_3.0_1725577478459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomed_roberta_base_4096_en_5.5.0_3.0_1725577478459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("biomed_roberta_base_4096","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("biomed_roberta_base_4096","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomed_roberta_base_4096| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|476.4 MB| + +## References + +https://huggingface.co/simonlevine/biomed_roberta_base-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_pipeline_en.md new file mode 100644 index 00000000000000..20f0118b1a380a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomed_roberta_base_4096_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biomed_roberta_base_4096_pipeline pipeline RoBertaEmbeddings from simonlevine +author: John Snow Labs +name: biomed_roberta_base_4096_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomed_roberta_base_4096_pipeline` is a English model originally trained by simonlevine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomed_roberta_base_4096_pipeline_en_5.5.0_3.0_1725577503010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomed_roberta_base_4096_pipeline_en_5.5.0_3.0_1725577503010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biomed_roberta_base_4096_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biomed_roberta_base_4096_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomed_roberta_base_4096_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|476.4 MB| + +## References + +https://huggingface.co/simonlevine/biomed_roberta_base-4096 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_en.md new file mode 100644 index 00000000000000..46875e923816c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomedical_ner_all_datasets_4 DistilBertForTokenClassification from Dogebooch +author: John Snow Labs +name: biomedical_ner_all_datasets_4 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedical_ner_all_datasets_4` is a English model originally trained by Dogebooch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedical_ner_all_datasets_4_en_5.5.0_3.0_1725500701759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedical_ner_all_datasets_4_en_5.5.0_3.0_1725500701759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("biomedical_ner_all_datasets_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("biomedical_ner_all_datasets_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedical_ner_all_datasets_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/Dogebooch/biomedical-ner-all_Datasets_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_pipeline_en.md new file mode 100644 index 00000000000000..0d616e655db28b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomedical_ner_all_datasets_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biomedical_ner_all_datasets_4_pipeline pipeline DistilBertForTokenClassification from Dogebooch +author: John Snow Labs +name: biomedical_ner_all_datasets_4_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedical_ner_all_datasets_4_pipeline` is a English model originally trained by Dogebooch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedical_ner_all_datasets_4_pipeline_en_5.5.0_3.0_1725500714055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedical_ner_all_datasets_4_pipeline_en_5.5.0_3.0_1725500714055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biomedical_ner_all_datasets_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biomedical_ner_all_datasets_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedical_ner_all_datasets_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/Dogebooch/biomedical-ner-all_Datasets_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomednlp_biomedbert_large_uncased_abstract_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_biomedbert_large_uncased_abstract_en.md new file mode 100644 index 00000000000000..bf73b0749a863e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_biomedbert_large_uncased_abstract_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomednlp_biomedbert_large_uncased_abstract BertEmbeddings from microsoft +author: John Snow Labs +name: biomednlp_biomedbert_large_uncased_abstract +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_biomedbert_large_uncased_abstract` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_biomedbert_large_uncased_abstract_en_5.5.0_3.0_1725520294545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_biomedbert_large_uncased_abstract_en_5.5.0_3.0_1725520294545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("biomednlp_biomedbert_large_uncased_abstract","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("biomednlp_biomedbert_large_uncased_abstract","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_biomedbert_large_uncased_abstract| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-BiomedBERT-large-uncased-abstract \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_en.md new file mode 100644 index 00000000000000..e415364973c35c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomednlp_pubmedbert_proteinstructure_ner_v3_1 BertForTokenClassification from mevol +author: John Snow Labs +name: biomednlp_pubmedbert_proteinstructure_ner_v3_1 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_proteinstructure_ner_v3_1` is a English model originally trained by mevol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v3_1_en_5.5.0_3.0_1725563318500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v3_1_en_5.5.0_3.0_1725563318500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("biomednlp_pubmedbert_proteinstructure_ner_v3_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("biomednlp_pubmedbert_proteinstructure_ner_v3_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_proteinstructure_ner_v3_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/mevol/BiomedNLP-PubMedBERT-ProteinStructure-NER-v3.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en.md new file mode 100644 index 00000000000000..e1ce6c132cd66b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline pipeline BertForTokenClassification from mevol +author: John Snow Labs +name: biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline` is a English model originally trained by mevol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en_5.5.0_3.0_1725563339958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline_en_5.5.0_3.0_1725563339958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_proteinstructure_ner_v3_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/mevol/BiomedNLP-PubMedBERT-ProteinStructure-NER-v3.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_carmen_humano_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_carmen_humano_pipeline_es.md new file mode 100644 index 00000000000000..c81988166929a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_carmen_humano_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_carmen_humano_pipeline pipeline RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_carmen_humano_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_carmen_humano_pipeline` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_humano_pipeline_es_5.5.0_3.0_1725512093786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_humano_pipeline_es_5.5.0_3.0_1725512093786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_carmen_humano_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_carmen_humano_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_carmen_humano_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|438.2 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-carmen-humano + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_es.md new file mode 100644 index 00000000000000..55cb80453b889f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish RoBertaEmbeddings from PlanTL-GOB-ES +author: John Snow Labs +name: bsc_bio_ehr_spanish +date: 2024-09-05 +tags: [es, open_source, onnx, embeddings, roberta] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish` is a Castilian, Spanish model originally trained by PlanTL-GOB-ES. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_es_5.5.0_3.0_1725566167938.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_es_5.5.0_3.0_1725566167938.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bsc_bio_ehr_spanish","es") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bsc_bio_ehr_spanish","es") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|es| +|Size:|295.7 MB| + +## References + +https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_pipeline_es.md new file mode 100644 index 00000000000000..69066a40590daf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-bsc_bio_ehr_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_pipeline pipeline RoBertaEmbeddings from PlanTL-GOB-ES +author: John Snow Labs +name: bsc_bio_ehr_spanish_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_pipeline` is a Castilian, Spanish model originally trained by PlanTL-GOB-ES. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_pipeline_es_5.5.0_3.0_1725566258423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_pipeline_es_5.5.0_3.0_1725566258423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|295.7 MB| + +## References + +https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_eli5_mlm_model_nateile_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_eli5_mlm_model_nateile_en.md new file mode 100644 index 00000000000000..4c6623a37615aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_eli5_mlm_model_nateile_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_eli5_mlm_model_nateile RoBertaEmbeddings from Nateile +author: John Snow Labs +name: burmese_awesome_eli5_mlm_model_nateile +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_eli5_mlm_model_nateile` is a English model originally trained by Nateile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_nateile_en_5.5.0_3.0_1725578274073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_nateile_en_5.5.0_3.0_1725578274073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("burmese_awesome_eli5_mlm_model_nateile","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("burmese_awesome_eli5_mlm_model_nateile","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_eli5_mlm_model_nateile| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/Nateile/my_awesome_eli5_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_jasssz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_jasssz_pipeline_en.md new file mode 100644 index 00000000000000..96a62281ce7cec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_jasssz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_jasssz_pipeline pipeline DistilBertForSequenceClassification from JasssZ +author: John Snow Labs +name: burmese_awesome_model_jasssz_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_jasssz_pipeline` is a English model originally trained by JasssZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_jasssz_pipeline_en_5.5.0_3.0_1725507539229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_jasssz_pipeline_en_5.5.0_3.0_1725507539229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_jasssz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_jasssz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_jasssz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/JasssZ/my_awesome_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_lenatt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_lenatt_pipeline_en.md new file mode 100644 index 00000000000000..1d581fdf9e98d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_model_lenatt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_lenatt_pipeline pipeline DistilBertForSequenceClassification from lenate +author: John Snow Labs +name: burmese_awesome_model_lenatt_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_lenatt_pipeline` is a English model originally trained by lenate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_lenatt_pipeline_en_5.5.0_3.0_1725507071662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_lenatt_pipeline_en_5.5.0_3.0_1725507071662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_lenatt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_lenatt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_lenatt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lenate/my_awesome_model_lenatt + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_en.md new file mode 100644 index 00000000000000..e6071d9d87ad74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_opus_books_model_wzchen MarianTransformer from wzChen +author: John Snow Labs +name: burmese_awesome_opus_books_model_wzchen +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_opus_books_model_wzchen` is a English model originally trained by wzChen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_opus_books_model_wzchen_en_5.5.0_3.0_1725545066789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_opus_books_model_wzchen_en_5.5.0_3.0_1725545066789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("burmese_awesome_opus_books_model_wzchen","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("burmese_awesome_opus_books_model_wzchen","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_opus_books_model_wzchen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.9 MB| + +## References + +https://huggingface.co/wzChen/my_awesome_opus_books_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_pipeline_en.md new file mode 100644 index 00000000000000..691ec35ea48247 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_opus_books_model_wzchen_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_opus_books_model_wzchen_pipeline pipeline MarianTransformer from wzChen +author: John Snow Labs +name: burmese_awesome_opus_books_model_wzchen_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_opus_books_model_wzchen_pipeline` is a English model originally trained by wzChen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_opus_books_model_wzchen_pipeline_en_5.5.0_3.0_1725545094272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_opus_books_model_wzchen_pipeline_en_5.5.0_3.0_1725545094272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_opus_books_model_wzchen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_opus_books_model_wzchen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_opus_books_model_wzchen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|533.5 MB| + +## References + +https://huggingface.co/wzChen/my_awesome_opus_books_model + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model2_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model2_en.md new file mode 100644 index 00000000000000..561c2f0ecfb039 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model2_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English burmese_awesome_wnut_model2 DistilBertForTokenClassification from Atheer174 +author: John Snow Labs +name: burmese_awesome_wnut_model2 +date: 2024-09-05 +tags: [bert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model2` is a English model originally trained by Atheer174. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model2_en_5.5.0_3.0_1725500821334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model2_en_5.5.0_3.0_1725500821334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val tokenClassifier = DistilBertForTokenClassification + .pretrained("burmese_awesome_wnut_model2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/Atheer174/my_awesome_wnut_model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_en.md new file mode 100644 index 00000000000000..f534cbad926860 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_abbie_tsao DistilBertForTokenClassification from Abbie-Tsao +author: John Snow Labs +name: burmese_awesome_wnut_model_abbie_tsao +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_abbie_tsao` is a English model originally trained by Abbie-Tsao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_abbie_tsao_en_5.5.0_3.0_1725518734135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_abbie_tsao_en_5.5.0_3.0_1725518734135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_abbie_tsao","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_abbie_tsao", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_abbie_tsao| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Abbie-Tsao/my-awesome-wnut-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_pipeline_en.md new file mode 100644 index 00000000000000..0e5f6c2ce6d7ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_abbie_tsao_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_abbie_tsao_pipeline pipeline DistilBertForTokenClassification from Abbie-Tsao +author: John Snow Labs +name: burmese_awesome_wnut_model_abbie_tsao_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_abbie_tsao_pipeline` is a English model originally trained by Abbie-Tsao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_abbie_tsao_pipeline_en_5.5.0_3.0_1725518746631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_abbie_tsao_pipeline_en_5.5.0_3.0_1725518746631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_abbie_tsao_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_abbie_tsao_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_abbie_tsao_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Abbie-Tsao/my-awesome-wnut-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_en.md new file mode 100644 index 00000000000000..568c1cc1fcccab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_beenish0092 DistilBertForTokenClassification from beenish0092 +author: John Snow Labs +name: burmese_awesome_wnut_model_beenish0092 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_beenish0092` is a English model originally trained by beenish0092. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_beenish0092_en_5.5.0_3.0_1725505848798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_beenish0092_en_5.5.0_3.0_1725505848798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_beenish0092","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_beenish0092", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_beenish0092| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/beenish0092/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_pipeline_en.md new file mode 100644 index 00000000000000..c4435467056f11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_beenish0092_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_beenish0092_pipeline pipeline DistilBertForTokenClassification from beenish0092 +author: John Snow Labs +name: burmese_awesome_wnut_model_beenish0092_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_beenish0092_pipeline` is a English model originally trained by beenish0092. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_beenish0092_pipeline_en_5.5.0_3.0_1725505860381.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_beenish0092_pipeline_en_5.5.0_3.0_1725505860381.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_beenish0092_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_beenish0092_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_beenish0092_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/beenish0092/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_en.md new file mode 100644 index 00000000000000..8cfe4f17be50a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_calebz9527 DistilBertForTokenClassification from CalebZ9527 +author: John Snow Labs +name: burmese_awesome_wnut_model_calebz9527 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_calebz9527` is a English model originally trained by CalebZ9527. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_calebz9527_en_5.5.0_3.0_1725518635861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_calebz9527_en_5.5.0_3.0_1725518635861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_calebz9527","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_calebz9527", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_calebz9527| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/CalebZ9527/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_pipeline_en.md new file mode 100644 index 00000000000000..c1b52e25eba5a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_calebz9527_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_calebz9527_pipeline pipeline DistilBertForTokenClassification from CalebZ9527 +author: John Snow Labs +name: burmese_awesome_wnut_model_calebz9527_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_calebz9527_pipeline` is a English model originally trained by CalebZ9527. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_calebz9527_pipeline_en_5.5.0_3.0_1725518649091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_calebz9527_pipeline_en_5.5.0_3.0_1725518649091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_calebz9527_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_calebz9527_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_calebz9527_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/CalebZ9527/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_casual_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_casual_pipeline_en.md new file mode 100644 index 00000000000000..0859873fec38de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_casual_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_casual_pipeline pipeline DistilBertForTokenClassification from casual +author: John Snow Labs +name: burmese_awesome_wnut_model_casual_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_casual_pipeline` is a English model originally trained by casual. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casual_pipeline_en_5.5.0_3.0_1725495552640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casual_pipeline_en_5.5.0_3.0_1725495552640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_casual_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_casual_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_casual_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/casual/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_chuhao1305_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_chuhao1305_en.md new file mode 100644 index 00000000000000..91ae636bcbe2bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_chuhao1305_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_chuhao1305 DistilBertForTokenClassification from Chuhao1305 +author: John Snow Labs +name: burmese_awesome_wnut_model_chuhao1305 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_chuhao1305` is a English model originally trained by Chuhao1305. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_chuhao1305_en_5.5.0_3.0_1725500716741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_chuhao1305_en_5.5.0_3.0_1725500716741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_chuhao1305","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_chuhao1305", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_chuhao1305| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Chuhao1305/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_en.md new file mode 100644 index 00000000000000..7cee6eb21cd59b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_connerside DistilBertForTokenClassification from Connerside +author: John Snow Labs +name: burmese_awesome_wnut_model_connerside +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_connerside` is a English model originally trained by Connerside. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_connerside_en_5.5.0_3.0_1725496084393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_connerside_en_5.5.0_3.0_1725496084393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_connerside","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_connerside", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_connerside| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Connerside/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_pipeline_en.md new file mode 100644 index 00000000000000..224b172f23d5a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_connerside_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_connerside_pipeline pipeline DistilBertForTokenClassification from Connerside +author: John Snow Labs +name: burmese_awesome_wnut_model_connerside_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_connerside_pipeline` is a English model originally trained by Connerside. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_connerside_pipeline_en_5.5.0_3.0_1725496096213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_connerside_pipeline_en_5.5.0_3.0_1725496096213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_connerside_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_connerside_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_connerside_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Connerside/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_donbasta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_donbasta_pipeline_en.md new file mode 100644 index 00000000000000..771a38e147b5bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_donbasta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_donbasta_pipeline pipeline DistilBertForTokenClassification from donbasta +author: John Snow Labs +name: burmese_awesome_wnut_model_donbasta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_donbasta_pipeline` is a English model originally trained by donbasta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_donbasta_pipeline_en_5.5.0_3.0_1725500751225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_donbasta_pipeline_en_5.5.0_3.0_1725500751225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_donbasta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_donbasta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_donbasta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/donbasta/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_duggurani_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_duggurani_pipeline_en.md new file mode 100644 index 00000000000000..f04d7e43f61925 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_duggurani_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_duggurani_pipeline pipeline DistilBertForTokenClassification from DugguRani +author: John Snow Labs +name: burmese_awesome_wnut_model_duggurani_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_duggurani_pipeline` is a English model originally trained by DugguRani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_duggurani_pipeline_en_5.5.0_3.0_1725496268571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_duggurani_pipeline_en_5.5.0_3.0_1725496268571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_duggurani_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_duggurani_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_duggurani_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DugguRani/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_fukada6280_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_fukada6280_pipeline_en.md new file mode 100644 index 00000000000000..d27d31bae244c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_fukada6280_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_fukada6280_pipeline pipeline DistilBertForTokenClassification from fukada6280 +author: John Snow Labs +name: burmese_awesome_wnut_model_fukada6280_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_fukada6280_pipeline` is a English model originally trained by fukada6280. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_fukada6280_pipeline_en_5.5.0_3.0_1725495736664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_fukada6280_pipeline_en_5.5.0_3.0_1725495736664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_fukada6280_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_fukada6280_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_fukada6280_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/fukada6280/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_girsha_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_girsha_pipeline_en.md new file mode 100644 index 00000000000000..11c8b662bd5301 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_girsha_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_girsha_pipeline pipeline DistilBertForTokenClassification from girsha +author: John Snow Labs +name: burmese_awesome_wnut_model_girsha_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_girsha_pipeline` is a English model originally trained by girsha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_girsha_pipeline_en_5.5.0_3.0_1725495557311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_girsha_pipeline_en_5.5.0_3.0_1725495557311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_girsha_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_girsha_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_girsha_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/girsha/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_en.md new file mode 100644 index 00000000000000..c7f23cdb9a8391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hamzamushtaq12 DistilBertForTokenClassification from hamzamushtaq12 +author: John Snow Labs +name: burmese_awesome_wnut_model_hamzamushtaq12 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hamzamushtaq12` is a English model originally trained by hamzamushtaq12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hamzamushtaq12_en_5.5.0_3.0_1725496119303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hamzamushtaq12_en_5.5.0_3.0_1725496119303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_hamzamushtaq12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_hamzamushtaq12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hamzamushtaq12| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hamzamushtaq12/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en.md new file mode 100644 index 00000000000000..02fa1d5081a041 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hamzamushtaq12_pipeline pipeline DistilBertForTokenClassification from hamzamushtaq12 +author: John Snow Labs +name: burmese_awesome_wnut_model_hamzamushtaq12_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hamzamushtaq12_pipeline` is a English model originally trained by hamzamushtaq12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en_5.5.0_3.0_1725496131634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hamzamushtaq12_pipeline_en_5.5.0_3.0_1725496131634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_hamzamushtaq12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_hamzamushtaq12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hamzamushtaq12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hamzamushtaq12/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_en.md new file mode 100644 index 00000000000000..8480313a1dae27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_irishzhang DistilBertForTokenClassification from irishzhang +author: John Snow Labs +name: burmese_awesome_wnut_model_irishzhang +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_irishzhang` is a English model originally trained by irishzhang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_irishzhang_en_5.5.0_3.0_1725500815577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_irishzhang_en_5.5.0_3.0_1725500815577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_irishzhang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_irishzhang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_irishzhang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/irishzhang/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_pipeline_en.md new file mode 100644 index 00000000000000..cca25016505b50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_irishzhang_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_irishzhang_pipeline pipeline DistilBertForTokenClassification from irishzhang +author: John Snow Labs +name: burmese_awesome_wnut_model_irishzhang_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_irishzhang_pipeline` is a English model originally trained by irishzhang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_irishzhang_pipeline_en_5.5.0_3.0_1725500827425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_irishzhang_pipeline_en_5.5.0_3.0_1725500827425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_irishzhang_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_irishzhang_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_irishzhang_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/irishzhang/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_en.md new file mode 100644 index 00000000000000..d0ba967439879a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jaydip_tss DistilBertForTokenClassification from jaydip-tss +author: John Snow Labs +name: burmese_awesome_wnut_model_jaydip_tss +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jaydip_tss` is a English model originally trained by jaydip-tss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jaydip_tss_en_5.5.0_3.0_1725495915398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jaydip_tss_en_5.5.0_3.0_1725495915398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jaydip_tss","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jaydip_tss", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jaydip_tss| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jaydip-tss/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_pipeline_en.md new file mode 100644 index 00000000000000..fa37d2d6fbd420 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_jaydip_tss_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jaydip_tss_pipeline pipeline DistilBertForTokenClassification from jaydip-tss +author: John Snow Labs +name: burmese_awesome_wnut_model_jaydip_tss_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jaydip_tss_pipeline` is a English model originally trained by jaydip-tss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jaydip_tss_pipeline_en_5.5.0_3.0_1725495926773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jaydip_tss_pipeline_en_5.5.0_3.0_1725495926773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_jaydip_tss_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_jaydip_tss_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jaydip_tss_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jaydip-tss/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_laitrongduc_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_laitrongduc_en.md new file mode 100644 index 00000000000000..ab16b298595ce9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_laitrongduc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_laitrongduc DistilBertForTokenClassification from laitrongduc +author: John Snow Labs +name: burmese_awesome_wnut_model_laitrongduc +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_laitrongduc` is a English model originally trained by laitrongduc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_laitrongduc_en_5.5.0_3.0_1725496235329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_laitrongduc_en_5.5.0_3.0_1725496235329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_laitrongduc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_laitrongduc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_laitrongduc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/laitrongduc/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_langchain12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_langchain12_pipeline_en.md new file mode 100644 index 00000000000000..ca65dcd22db1d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_langchain12_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_langchain12_pipeline pipeline DistilBertForTokenClassification from LangChain12 +author: John Snow Labs +name: burmese_awesome_wnut_model_langchain12_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_langchain12_pipeline` is a English model originally trained by LangChain12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_langchain12_pipeline_en_5.5.0_3.0_1725518275217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_langchain12_pipeline_en_5.5.0_3.0_1725518275217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_langchain12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_langchain12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_langchain12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/LangChain12/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_lash_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_lash_en.md new file mode 100644 index 00000000000000..a6eba95df78745 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_lash_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_lash DistilBertForTokenClassification from lash +author: John Snow Labs +name: burmese_awesome_wnut_model_lash +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_lash` is a English model originally trained by lash. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_lash_en_5.5.0_3.0_1725500330309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_lash_en_5.5.0_3.0_1725500330309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_lash","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_lash", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_lash| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/lash/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manikanta_goli_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manikanta_goli_pipeline_en.md new file mode 100644 index 00000000000000..b7006aecb9302c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manikanta_goli_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_manikanta_goli_pipeline pipeline DistilBertForTokenClassification from Manikanta-goli +author: John Snow Labs +name: burmese_awesome_wnut_model_manikanta_goli_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_manikanta_goli_pipeline` is a English model originally trained by Manikanta-goli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_manikanta_goli_pipeline_en_5.5.0_3.0_1725496103216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_manikanta_goli_pipeline_en_5.5.0_3.0_1725496103216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_manikanta_goli_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_manikanta_goli_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_manikanta_goli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Manikanta-goli/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manusj_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manusj_pipeline_en.md new file mode 100644 index 00000000000000..27f59bf1406d21 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_manusj_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_manusj_pipeline pipeline DistilBertForTokenClassification from Manusj +author: John Snow Labs +name: burmese_awesome_wnut_model_manusj_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_manusj_pipeline` is a English model originally trained by Manusj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_manusj_pipeline_en_5.5.0_3.0_1725495813628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_manusj_pipeline_en_5.5.0_3.0_1725495813628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_manusj_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_manusj_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_manusj_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Manusj/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_en.md new file mode 100644 index 00000000000000..7ca7731708fb81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_qminh369 DistilBertForTokenClassification from qminh369 +author: John Snow Labs +name: burmese_awesome_wnut_model_qminh369 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_qminh369` is a English model originally trained by qminh369. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_qminh369_en_5.5.0_3.0_1725496209609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_qminh369_en_5.5.0_3.0_1725496209609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_qminh369","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_qminh369", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_qminh369| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/qminh369/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_pipeline_en.md new file mode 100644 index 00000000000000..c17ee83173fd3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_qminh369_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_qminh369_pipeline pipeline DistilBertForTokenClassification from qminh369 +author: John Snow Labs +name: burmese_awesome_wnut_model_qminh369_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_qminh369_pipeline` is a English model originally trained by qminh369. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_qminh369_pipeline_en_5.5.0_3.0_1725496221418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_qminh369_pipeline_en_5.5.0_3.0_1725496221418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_qminh369_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_qminh369_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_qminh369_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/qminh369/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_thypogean_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_thypogean_en.md new file mode 100644 index 00000000000000..636af4e53c34f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_thypogean_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_thypogean DistilBertForTokenClassification from thypogean +author: John Snow Labs +name: burmese_awesome_wnut_model_thypogean +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_thypogean` is a English model originally trained by thypogean. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_thypogean_en_5.5.0_3.0_1725518278168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_thypogean_en_5.5.0_3.0_1725518278168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_thypogean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_thypogean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_thypogean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/thypogean/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yjoonjang_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yjoonjang_en.md new file mode 100644 index 00000000000000..02202722823e7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yjoonjang_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yjoonjang DistilBertForTokenClassification from yjoonjang +author: John Snow Labs +name: burmese_awesome_wnut_model_yjoonjang +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yjoonjang` is a English model originally trained by yjoonjang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yjoonjang_en_5.5.0_3.0_1725496014816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yjoonjang_en_5.5.0_3.0_1725496014816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yjoonjang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yjoonjang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yjoonjang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yjoonjang/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_en.md new file mode 100644 index 00000000000000..f06bf5e864b45b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yohand DistilBertForTokenClassification from yohand +author: John Snow Labs +name: burmese_awesome_wnut_model_yohand +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yohand` is a English model originally trained by yohand. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yohand_en_5.5.0_3.0_1725495917834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yohand_en_5.5.0_3.0_1725495917834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yohand","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yohand", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yohand| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yohand/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_pipeline_en.md new file mode 100644 index 00000000000000..287f41d74e1a73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yohand_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yohand_pipeline pipeline DistilBertForTokenClassification from yohand +author: John Snow Labs +name: burmese_awesome_wnut_model_yohand_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yohand_pipeline` is a English model originally trained by yohand. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yohand_pipeline_en_5.5.0_3.0_1725495931150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yohand_pipeline_en_5.5.0_3.0_1725495931150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_yohand_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_yohand_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yohand_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yohand/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yuting27_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yuting27_pipeline_en.md new file mode 100644 index 00000000000000..ced940c188fdfe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_model_yuting27_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yuting27_pipeline pipeline DistilBertForTokenClassification from yuting27 +author: John Snow Labs +name: burmese_awesome_wnut_model_yuting27_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yuting27_pipeline` is a English model originally trained by yuting27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yuting27_pipeline_en_5.5.0_3.0_1725518397017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yuting27_pipeline_en_5.5.0_3.0_1725518397017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_yuting27_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_yuting27_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yuting27_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yuting27/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_place_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_place_pipeline_en.md new file mode 100644 index 00000000000000..bbac3a6061fd81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_awesome_wnut_place_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_place_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_place_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_place_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_place_pipeline_en_5.5.0_3.0_1725518103070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_place_pipeline_en_5.5.0_3.0_1725518103070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_place_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_place_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_place_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_Place + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_en.md new file mode 100644 index 00000000000000..4c14e31505f86c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_nepal_bhasa_ner_model DistilBertForTokenClassification from veronica1608 +author: John Snow Labs +name: burmese_nepal_bhasa_ner_model +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_nepal_bhasa_ner_model` is a English model originally trained by veronica1608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_nepal_bhasa_ner_model_en_5.5.0_3.0_1725518744662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_nepal_bhasa_ner_model_en_5.5.0_3.0_1725518744662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_nepal_bhasa_ner_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_nepal_bhasa_ner_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_nepal_bhasa_ner_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/veronica1608/my_new_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_pipeline_en.md new file mode 100644 index 00000000000000..4458f8b3b88561 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_nepal_bhasa_ner_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_nepal_bhasa_ner_model_pipeline pipeline DistilBertForTokenClassification from veronica1608 +author: John Snow Labs +name: burmese_nepal_bhasa_ner_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_nepal_bhasa_ner_model_pipeline` is a English model originally trained by veronica1608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_nepal_bhasa_ner_model_pipeline_en_5.5.0_3.0_1725518756585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_nepal_bhasa_ner_model_pipeline_en_5.5.0_3.0_1725518756585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_nepal_bhasa_ner_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_nepal_bhasa_ner_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_nepal_bhasa_ner_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/veronica1608/my_new_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_en.md new file mode 100644 index 00000000000000..4c11dcf5040439 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ner_model_balciberin DistilBertForTokenClassification from balciberin +author: John Snow Labs +name: burmese_ner_model_balciberin +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_balciberin` is a English model originally trained by balciberin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_balciberin_en_5.5.0_3.0_1725496020712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_balciberin_en_5.5.0_3.0_1725496020712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_balciberin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_balciberin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_balciberin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/balciberin/my_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_pipeline_en.md new file mode 100644 index 00000000000000..d471793f1eefd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_balciberin_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_balciberin_pipeline pipeline DistilBertForTokenClassification from balciberin +author: John Snow Labs +name: burmese_ner_model_balciberin_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_balciberin_pipeline` is a English model originally trained by balciberin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_balciberin_pipeline_en_5.5.0_3.0_1725496032531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_balciberin_pipeline_en_5.5.0_3.0_1725496032531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_balciberin_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_balciberin_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_balciberin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/balciberin/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_mundo_go_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_mundo_go_pipeline_en.md new file mode 100644 index 00000000000000..78310319b691f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_mundo_go_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_mundo_go_pipeline pipeline DistilBertForTokenClassification from mundo-go +author: John Snow Labs +name: burmese_ner_model_mundo_go_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_mundo_go_pipeline` is a English model originally trained by mundo-go. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_mundo_go_pipeline_en_5.5.0_3.0_1725518353361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_mundo_go_pipeline_en_5.5.0_3.0_1725518353361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_mundo_go_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_mundo_go_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_mundo_go_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|621.2 MB| + +## References + +https://huggingface.co/mundo-go/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_rwindia_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_rwindia_en.md new file mode 100644 index 00000000000000..7daf255d9bc404 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_rwindia_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ner_model_rwindia DistilBertForTokenClassification from rwindia +author: John Snow Labs +name: burmese_ner_model_rwindia +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_rwindia` is a English model originally trained by rwindia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_rwindia_en_5.5.0_3.0_1725495917413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_rwindia_en_5.5.0_3.0_1725495917413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_rwindia","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_rwindia", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_rwindia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rwindia/my_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_en.md new file mode 100644 index 00000000000000..19738306623a1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ner_model_uppaluru DistilBertForTokenClassification from uppaluru +author: John Snow Labs +name: burmese_ner_model_uppaluru +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_uppaluru` is a English model originally trained by uppaluru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_uppaluru_en_5.5.0_3.0_1725500274535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_uppaluru_en_5.5.0_3.0_1725500274535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_uppaluru","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ner_model_uppaluru", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_uppaluru| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/uppaluru/my_ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_pipeline_en.md new file mode 100644 index 00000000000000..13fdb3dd3f64e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-burmese_ner_model_uppaluru_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_uppaluru_pipeline pipeline DistilBertForTokenClassification from uppaluru +author: John Snow Labs +name: burmese_ner_model_uppaluru_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_uppaluru_pipeline` is a English model originally trained by uppaluru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_uppaluru_pipeline_en_5.5.0_3.0_1725500288848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_uppaluru_pipeline_en_5.5.0_3.0_1725500288848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_uppaluru_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_uppaluru_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_uppaluru_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/uppaluru/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-businessbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-businessbert_en.md new file mode 100644 index 00000000000000..1b09a4ee9e0cae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-businessbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English businessbert BertEmbeddings from pborchert +author: John Snow Labs +name: businessbert +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`businessbert` is a English model originally trained by pborchert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/businessbert_en_5.5.0_3.0_1725552838079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/businessbert_en_5.5.0_3.0_1725552838079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("businessbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("businessbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|businessbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|404.8 MB| + +## References + +https://huggingface.co/pborchert/BusinessBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-businessbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-businessbert_pipeline_en.md new file mode 100644 index 00000000000000..6c713c855dd5c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-businessbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English businessbert_pipeline pipeline BertEmbeddings from pborchert +author: John Snow Labs +name: businessbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`businessbert_pipeline` is a English model originally trained by pborchert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/businessbert_pipeline_en_5.5.0_3.0_1725552858555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/businessbert_pipeline_en_5.5.0_3.0_1725552858555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("businessbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("businessbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|businessbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|404.8 MB| + +## References + +https://huggingface.co/pborchert/BusinessBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_en.md b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_en.md new file mode 100644 index 00000000000000..55195c17cbbd44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English candle_cvss_availability MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_availability +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_availability` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_availability_en_5.5.0_3.0_1725575376313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_availability_en_5.5.0_3.0_1725575376313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_availability","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_availability", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_availability| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_availability \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_pipeline_en.md new file mode 100644 index 00000000000000..7527de66a3255e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_availability_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English candle_cvss_availability_pipeline pipeline MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_availability_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_availability_pipeline` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_availability_pipeline_en_5.5.0_3.0_1725575397174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_availability_pipeline_en_5.5.0_3.0_1725575397174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("candle_cvss_availability_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("candle_cvss_availability_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_availability_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_availability + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_scope_en.md b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_scope_en.md new file mode 100644 index 00000000000000..adc920989e3c18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_scope_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English candle_cvss_scope MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_scope +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_scope` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_scope_en_5.5.0_3.0_1725575327599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_scope_en_5.5.0_3.0_1725575327599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_scope","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_scope", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_scope| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_scope \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_vector_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_vector_pipeline_en.md new file mode 100644 index 00000000000000..bb271ae153688f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-candle_cvss_vector_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English candle_cvss_vector_pipeline pipeline MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_vector_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_vector_pipeline` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_vector_pipeline_en_5.5.0_3.0_1725574968153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_vector_pipeline_en_5.5.0_3.0_1725574968153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("candle_cvss_vector_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("candle_cvss_vector_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_vector_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_vector + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-category_1_delivery_cancellation_distilbert_base_cased_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-category_1_delivery_cancellation_distilbert_base_cased_v1_en.md new file mode 100644 index 00000000000000..ffa003a7499069 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-category_1_delivery_cancellation_distilbert_base_cased_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English category_1_delivery_cancellation_distilbert_base_cased_v1 DistilBertForSequenceClassification from chuuhtetnaing +author: John Snow Labs +name: category_1_delivery_cancellation_distilbert_base_cased_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`category_1_delivery_cancellation_distilbert_base_cased_v1` is a English model originally trained by chuuhtetnaing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/category_1_delivery_cancellation_distilbert_base_cased_v1_en_5.5.0_3.0_1725507682844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/category_1_delivery_cancellation_distilbert_base_cased_v1_en_5.5.0_3.0_1725507682844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("category_1_delivery_cancellation_distilbert_base_cased_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("category_1_delivery_cancellation_distilbert_base_cased_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|category_1_delivery_cancellation_distilbert_base_cased_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/chuuhtetnaing/category-1-delivery-cancellation-distilbert-base-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cefr_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cefr_model_pipeline_en.md new file mode 100644 index 00000000000000..c917c68d5a900a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cefr_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cefr_model_pipeline pipeline RoBertaForSequenceClassification from CerenCaglar +author: John Snow Labs +name: cefr_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cefr_model_pipeline` is a English model originally trained by CerenCaglar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cefr_model_pipeline_en_5.5.0_3.0_1725542182477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cefr_model_pipeline_en_5.5.0_3.0_1725542182477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cefr_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cefr_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cefr_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|431.7 MB| + +## References + +https://huggingface.co/CerenCaglar/Cefr_Model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_en.md new file mode 100644 index 00000000000000..8f481b1d020713 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English checkpoint_11600 XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_11600 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_11600` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_11600_en_5.5.0_3.0_1725532175931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_11600_en_5.5.0_3.0_1725532175931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_11600","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_11600","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_11600| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-11600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_pipeline_en.md new file mode 100644 index 00000000000000..87b07ba29b1e4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_11600_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English checkpoint_11600_pipeline pipeline XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_11600_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_11600_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_11600_pipeline_en_5.5.0_3.0_1725532225789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_11600_pipeline_en_5.5.0_3.0_1725532225789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("checkpoint_11600_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("checkpoint_11600_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_11600_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-11600 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_en.md new file mode 100644 index 00000000000000..7fe09274db432a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English checkpoint_14200 XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_14200 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_14200` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_14200_en_5.5.0_3.0_1725555483980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_14200_en_5.5.0_3.0_1725555483980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_14200","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_14200","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_14200| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-14200 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_pipeline_en.md new file mode 100644 index 00000000000000..bac0f8c066734b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_14200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English checkpoint_14200_pipeline pipeline XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_14200_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_14200_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_14200_pipeline_en_5.5.0_3.0_1725555542003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_14200_pipeline_en_5.5.0_3.0_1725555542003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("checkpoint_14200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("checkpoint_14200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_14200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-14200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_en.md new file mode 100644 index 00000000000000..368692de6b312b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English checkpoint_22200 XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_22200 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_22200` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_22200_en_5.5.0_3.0_1725531645540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_22200_en_5.5.0_3.0_1725531645540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_22200","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("checkpoint_22200","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_22200| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-22200 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_pipeline_en.md new file mode 100644 index 00000000000000..4988f8590f8d88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-checkpoint_22200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English checkpoint_22200_pipeline pipeline XlmRoBertaEmbeddings from yemen2016 +author: John Snow Labs +name: checkpoint_22200_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`checkpoint_22200_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/checkpoint_22200_pipeline_en_5.5.0_3.0_1725531700518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/checkpoint_22200_pipeline_en_5.5.0_3.0_1725531700518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("checkpoint_22200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("checkpoint_22200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|checkpoint_22200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-22200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_en.md b/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_en.md new file mode 100644 index 00000000000000..2cf9188dc30586 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English chemberta_pubchem1m_shard00 RoBertaEmbeddings from seyonec +author: John Snow Labs +name: chemberta_pubchem1m_shard00 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemberta_pubchem1m_shard00` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_en_5.5.0_3.0_1725571935034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_en_5.5.0_3.0_1725571935034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("chemberta_pubchem1m_shard00","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("chemberta_pubchem1m_shard00","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemberta_pubchem1m_shard00| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|310.0 MB| + +## References + +https://huggingface.co/seyonec/ChemBERTA_PubChem1M_shard00 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_pipeline_en.md new file mode 100644 index 00000000000000..866b0441e5a1e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-chemberta_pubchem1m_shard00_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English chemberta_pubchem1m_shard00_pipeline pipeline RoBertaEmbeddings from seyonec +author: John Snow Labs +name: chemberta_pubchem1m_shard00_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemberta_pubchem1m_shard00_pipeline` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_pipeline_en_5.5.0_3.0_1725571951450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_pipeline_en_5.5.0_3.0_1725571951450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("chemberta_pubchem1m_shard00_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("chemberta_pubchem1m_shard00_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemberta_pubchem1m_shard00_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|310.0 MB| + +## References + +https://huggingface.co/seyonec/ChemBERTA_PubChem1M_shard00 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_en.md new file mode 100644 index 00000000000000..294ae3bb53b56d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cino_base_v2_tncc_document_tsheg XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_base_v2_tncc_document_tsheg +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_base_v2_tncc_document_tsheg` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_base_v2_tncc_document_tsheg_en_5.5.0_3.0_1725529835315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_base_v2_tncc_document_tsheg_en_5.5.0_3.0_1725529835315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_base_v2_tncc_document_tsheg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_base_v2_tncc_document_tsheg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_base_v2_tncc_document_tsheg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|712.4 MB| + +## References + +https://huggingface.co/UTibetNLP/cino-base-v2_TNCC-document_tsheg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_pipeline_en.md new file mode 100644 index 00000000000000..ee63aa08f1533c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_base_v2_tncc_document_tsheg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cino_base_v2_tncc_document_tsheg_pipeline pipeline XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_base_v2_tncc_document_tsheg_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_base_v2_tncc_document_tsheg_pipeline` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_base_v2_tncc_document_tsheg_pipeline_en_5.5.0_3.0_1725529870080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_base_v2_tncc_document_tsheg_pipeline_en_5.5.0_3.0_1725529870080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cino_base_v2_tncc_document_tsheg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cino_base_v2_tncc_document_tsheg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_base_v2_tncc_document_tsheg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|712.4 MB| + +## References + +https://huggingface.co/UTibetNLP/cino-base-v2_TNCC-document_tsheg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_en.md new file mode 100644 index 00000000000000..de37d5ac94ef4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cino_large_v2_tncc_document_tsheg XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_large_v2_tncc_document_tsheg +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_large_v2_tncc_document_tsheg` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_document_tsheg_en_5.5.0_3.0_1725536352809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_document_tsheg_en_5.5.0_3.0_1725536352809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_large_v2_tncc_document_tsheg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_large_v2_tncc_document_tsheg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_large_v2_tncc_document_tsheg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/UTibetNLP/cino-large-v2_TNCC-document_tsheg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_pipeline_en.md new file mode 100644 index 00000000000000..8b6f1bee292640 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_document_tsheg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cino_large_v2_tncc_document_tsheg_pipeline pipeline XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_large_v2_tncc_document_tsheg_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_large_v2_tncc_document_tsheg_pipeline` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_document_tsheg_pipeline_en_5.5.0_3.0_1725536432127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_document_tsheg_pipeline_en_5.5.0_3.0_1725536432127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cino_large_v2_tncc_document_tsheg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cino_large_v2_tncc_document_tsheg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_large_v2_tncc_document_tsheg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/UTibetNLP/cino-large-v2_TNCC-document_tsheg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_en.md new file mode 100644 index 00000000000000..28333d7618d3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cino_large_v2_tncc_title_tsheg XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_large_v2_tncc_title_tsheg +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_large_v2_tncc_title_tsheg` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_title_tsheg_en_5.5.0_3.0_1725530577172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_title_tsheg_en_5.5.0_3.0_1725530577172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_large_v2_tncc_title_tsheg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cino_large_v2_tncc_title_tsheg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_large_v2_tncc_title_tsheg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/UTibetNLP/cino-large-v2_TNCC-title_tsheg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_pipeline_en.md new file mode 100644 index 00000000000000..96de00a2704b1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cino_large_v2_tncc_title_tsheg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cino_large_v2_tncc_title_tsheg_pipeline pipeline XlmRoBertaForSequenceClassification from UTibetNLP +author: John Snow Labs +name: cino_large_v2_tncc_title_tsheg_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cino_large_v2_tncc_title_tsheg_pipeline` is a English model originally trained by UTibetNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_title_tsheg_pipeline_en_5.5.0_3.0_1725530655851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cino_large_v2_tncc_title_tsheg_pipeline_en_5.5.0_3.0_1725530655851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cino_large_v2_tncc_title_tsheg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cino_large_v2_tncc_title_tsheg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cino_large_v2_tncc_title_tsheg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/UTibetNLP/cino-large-v2_TNCC-title_tsheg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_en.md new file mode 100644 index 00000000000000..c05d339769f275 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English clasificador_muchocine_modeloalbert AlbertForSequenceClassification from martagrueso +author: John Snow Labs +name: clasificador_muchocine_modeloalbert +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clasificador_muchocine_modeloalbert` is a English model originally trained by martagrueso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clasificador_muchocine_modeloalbert_en_5.5.0_3.0_1725543222909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clasificador_muchocine_modeloalbert_en_5.5.0_3.0_1725543222909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("clasificador_muchocine_modeloalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("clasificador_muchocine_modeloalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clasificador_muchocine_modeloalbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.7 MB| + +## References + +https://huggingface.co/martagrueso/clasificador-muchocine-modeloalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_pipeline_en.md new file mode 100644 index 00000000000000..65225de8ef502d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clasificador_muchocine_modeloalbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English clasificador_muchocine_modeloalbert_pipeline pipeline AlbertForSequenceClassification from martagrueso +author: John Snow Labs +name: clasificador_muchocine_modeloalbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clasificador_muchocine_modeloalbert_pipeline` is a English model originally trained by martagrueso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clasificador_muchocine_modeloalbert_pipeline_en_5.5.0_3.0_1725543225526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clasificador_muchocine_modeloalbert_pipeline_en_5.5.0_3.0_1725543225526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clasificador_muchocine_modeloalbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clasificador_muchocine_modeloalbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clasificador_muchocine_modeloalbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.7 MB| + +## References + +https://huggingface.co/martagrueso/clasificador-muchocine-modeloalbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_en.md b/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_en.md new file mode 100644 index 00000000000000..0fc9e3cc708332 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classifier__mergedcutiesruns__evidencealignment_albert AlbertForSequenceClassification from yevhenkost +author: John Snow Labs +name: classifier__mergedcutiesruns__evidencealignment_albert +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classifier__mergedcutiesruns__evidencealignment_albert` is a English model originally trained by yevhenkost. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classifier__mergedcutiesruns__evidencealignment_albert_en_5.5.0_3.0_1725510087651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classifier__mergedcutiesruns__evidencealignment_albert_en_5.5.0_3.0_1725510087651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("classifier__mergedcutiesruns__evidencealignment_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("classifier__mergedcutiesruns__evidencealignment_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classifier__mergedcutiesruns__evidencealignment_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/yevhenkost/classifier__mergedcutiesruns__evidenceAlignment_albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en.md new file mode 100644 index 00000000000000..c011fb4036eee1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English classifier__mergedcutiesruns__evidencealignment_albert_pipeline pipeline AlbertForSequenceClassification from yevhenkost +author: John Snow Labs +name: classifier__mergedcutiesruns__evidencealignment_albert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classifier__mergedcutiesruns__evidencealignment_albert_pipeline` is a English model originally trained by yevhenkost. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en_5.5.0_3.0_1725510090003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classifier__mergedcutiesruns__evidencealignment_albert_pipeline_en_5.5.0_3.0_1725510090003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("classifier__mergedcutiesruns__evidencealignment_albert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("classifier__mergedcutiesruns__evidencealignment_albert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classifier__mergedcutiesruns__evidencealignment_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/yevhenkost/classifier__mergedcutiesruns__evidenceAlignment_albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_en.md b/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_en.md new file mode 100644 index 00000000000000..b73fe8716be1b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classify_isin_step6_binary AlbertForSequenceClassification from calculito +author: John Snow Labs +name: classify_isin_step6_binary +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_isin_step6_binary` is a English model originally trained by calculito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_isin_step6_binary_en_5.5.0_3.0_1725543309676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_isin_step6_binary_en_5.5.0_3.0_1725543309676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_isin_step6_binary","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("classify_isin_step6_binary", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_isin_step6_binary| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/calculito/classify-ISIN-STEP6_binary \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_pipeline_en.md new file mode 100644 index 00000000000000..8fb48c691df9e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-classify_isin_step6_binary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English classify_isin_step6_binary_pipeline pipeline AlbertForSequenceClassification from calculito +author: John Snow Labs +name: classify_isin_step6_binary_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classify_isin_step6_binary_pipeline` is a English model originally trained by calculito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classify_isin_step6_binary_pipeline_en_5.5.0_3.0_1725543312189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classify_isin_step6_binary_pipeline_en_5.5.0_3.0_1725543312189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("classify_isin_step6_binary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("classify_isin_step6_binary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classify_isin_step6_binary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/calculito/classify-ISIN-STEP6_binary + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clinicalbert_full_finetuned_ner_pablo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clinicalbert_full_finetuned_ner_pablo_pipeline_en.md new file mode 100644 index 00000000000000..9325c42eb7c9a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clinicalbert_full_finetuned_ner_pablo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English clinicalbert_full_finetuned_ner_pablo_pipeline pipeline DistilBertForTokenClassification from pabRomero +author: John Snow Labs +name: clinicalbert_full_finetuned_ner_pablo_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalbert_full_finetuned_ner_pablo_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725506399035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725506399035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clinicalbert_full_finetuned_ner_pablo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clinicalbert_full_finetuned_ner_pablo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalbert_full_finetuned_ner_pablo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/pabRomero/ClinicalBERT-full-finetuned-ner-pablo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_finetuned_en.md new file mode 100644 index 00000000000000..9b7dc624d4c36d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_finetuned_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_finetuned CLIPForZeroShotClassification from vinluvie +author: John Snow Labs +name: clip_finetuned +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_finetuned` is a English model originally trained by vinluvie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_finetuned_en_5.5.0_3.0_1725522307764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_finetuned_en_5.5.0_3.0_1725522307764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_finetuned","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_finetuned","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/vinluvie/clip-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_large_fp16_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_large_fp16_en.md new file mode 100644 index 00000000000000..9f0dde2f25d165 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_large_fp16_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_large_fp16 CLIPForZeroShotClassification from dahwinsingularity +author: John Snow Labs +name: clip_large_fp16 +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_large_fp16` is a English model originally trained by dahwinsingularity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_large_fp16_en_5.5.0_3.0_1725523454975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_large_fp16_en_5.5.0_3.0_1725523454975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_large_fp16","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_large_fp16","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_large_fp16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/dahwinsingularity/clip_large_fp16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_en.md new file mode 100644 index 00000000000000..886be9968fea19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_rsicd_ngit CLIPForZeroShotClassification from Ngit +author: John Snow Labs +name: clip_rsicd_ngit +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_rsicd_ngit` is a English model originally trained by Ngit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_rsicd_ngit_en_5.5.0_3.0_1725522853598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_rsicd_ngit_en_5.5.0_3.0_1725522853598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_rsicd_ngit","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_rsicd_ngit","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_rsicd_ngit| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/Ngit/clip-rsicd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_pipeline_en.md new file mode 100644 index 00000000000000..9f6d57db1e7768 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_rsicd_ngit_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_rsicd_ngit_pipeline pipeline CLIPForZeroShotClassification from Ngit +author: John Snow Labs +name: clip_rsicd_ngit_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_rsicd_ngit_pipeline` is a English model originally trained by Ngit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_rsicd_ngit_pipeline_en_5.5.0_3.0_1725522882690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_rsicd_ngit_pipeline_en_5.5.0_3.0_1725522882690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_rsicd_ngit_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_rsicd_ngit_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_rsicd_ngit_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/Ngit/clip-rsicd + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_seed_vit_8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_seed_vit_8_pipeline_en.md new file mode 100644 index 00000000000000..f94d89fa63dc31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_seed_vit_8_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_seed_vit_8_pipeline pipeline CLIPForZeroShotClassification from zabir735 +author: John Snow Labs +name: clip_seed_vit_8_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_seed_vit_8_pipeline` is a English model originally trained by zabir735. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_seed_vit_8_pipeline_en_5.5.0_3.0_1725522633311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_seed_vit_8_pipeline_en_5.5.0_3.0_1725522633311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_seed_vit_8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_seed_vit_8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_seed_vit_8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|561.2 MB| + +## References + +https://huggingface.co/zabir735/clip-seed-vit-8 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_en.md new file mode 100644 index 00000000000000..2190a6b3759ea0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_base_patch16_adasdimchom CLIPForZeroShotClassification from adasdimchom +author: John Snow Labs +name: clip_vit_base_patch16_adasdimchom +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch16_adasdimchom` is a English model originally trained by adasdimchom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_adasdimchom_en_5.5.0_3.0_1725523612356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_adasdimchom_en_5.5.0_3.0_1725523612356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16_adasdimchom","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16_adasdimchom","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch16_adasdimchom| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|393.8 MB| + +## References + +https://huggingface.co/adasdimchom/clip-vit-base-patch16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_pipeline_en.md new file mode 100644 index 00000000000000..a87a843e869f35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_adasdimchom_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_base_patch16_adasdimchom_pipeline pipeline CLIPForZeroShotClassification from adasdimchom +author: John Snow Labs +name: clip_vit_base_patch16_adasdimchom_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch16_adasdimchom_pipeline` is a English model originally trained by adasdimchom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_adasdimchom_pipeline_en_5.5.0_3.0_1725523701720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_adasdimchom_pipeline_en_5.5.0_3.0_1725523701720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_base_patch16_adasdimchom_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_base_patch16_adasdimchom_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch16_adasdimchom_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|393.8 MB| + +## References + +https://huggingface.co/adasdimchom/clip-vit-base-patch16 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_img_text_relevancy_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_img_text_relevancy_en.md new file mode 100644 index 00000000000000..04ed93719ebce1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch16_img_text_relevancy_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_base_patch16_img_text_relevancy CLIPForZeroShotClassification from jancuhel +author: John Snow Labs +name: clip_vit_base_patch16_img_text_relevancy +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch16_img_text_relevancy` is a English model originally trained by jancuhel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_img_text_relevancy_en_5.5.0_3.0_1725522344163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch16_img_text_relevancy_en_5.5.0_3.0_1725522344163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16_img_text_relevancy","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_base_patch16_img_text_relevancy","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch16_img_text_relevancy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|393.9 MB| + +## References + +https://huggingface.co/jancuhel/clip-vit-base-patch16-img-text-relevancy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch32_demo_xiaoliy2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch32_demo_xiaoliy2_pipeline_en.md new file mode 100644 index 00000000000000..a3bd8a533ed8c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_base_patch32_demo_xiaoliy2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_base_patch32_demo_xiaoliy2_pipeline pipeline CLIPForZeroShotClassification from xiaoliy2 +author: John Snow Labs +name: clip_vit_base_patch32_demo_xiaoliy2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_base_patch32_demo_xiaoliy2_pipeline` is a English model originally trained by xiaoliy2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch32_demo_xiaoliy2_pipeline_en_5.5.0_3.0_1725523566300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_base_patch32_demo_xiaoliy2_pipeline_en_5.5.0_3.0_1725523566300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_base_patch32_demo_xiaoliy2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_base_patch32_demo_xiaoliy2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_base_patch32_demo_xiaoliy2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|397.5 MB| + +## References + +https://huggingface.co/xiaoliy2/clip-vit-base-patch32-demo + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_en.md new file mode 100644 index 00000000000000..52809eb234e6dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_224_korean CLIPForZeroShotClassification from harry-kr +author: John Snow Labs +name: clip_vit_large_patch14_224_korean +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_224_korean` is a English model originally trained by harry-kr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_224_korean_en_5.5.0_3.0_1725523195267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_224_korean_en_5.5.0_3.0_1725523195267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_224_korean","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_224_korean","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_224_korean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/harry-kr/clip-vit-large-patch14-224-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_pipeline_en.md new file mode 100644 index 00000000000000..7df977cb0213f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_224_korean_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_224_korean_pipeline pipeline CLIPForZeroShotClassification from harry-kr +author: John Snow Labs +name: clip_vit_large_patch14_224_korean_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_224_korean_pipeline` is a English model originally trained by harry-kr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_224_korean_pipeline_en_5.5.0_3.0_1725523427055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_224_korean_pipeline_en_5.5.0_3.0_1725523427055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_224_korean_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_224_korean_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_224_korean_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/harry-kr/clip-vit-large-patch14-224-ko + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_en.md new file mode 100644 index 00000000000000..d9d14993f0b45e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_custom_handler CLIPForZeroShotClassification from aayushgs +author: John Snow Labs +name: clip_vit_large_patch14_custom_handler +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_custom_handler` is a English model originally trained by aayushgs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_custom_handler_en_5.5.0_3.0_1725540364479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_custom_handler_en_5.5.0_3.0_1725540364479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_custom_handler","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_custom_handler","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_custom_handler| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/aayushgs/clip-vit-large-patch14-custom-handler \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_pipeline_en.md new file mode 100644 index 00000000000000..80d22cf288e042 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_custom_handler_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_custom_handler_pipeline pipeline CLIPForZeroShotClassification from aayushgs +author: John Snow Labs +name: clip_vit_large_patch14_custom_handler_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_custom_handler_pipeline` is a English model originally trained by aayushgs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_custom_handler_pipeline_en_5.5.0_3.0_1725540647997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_custom_handler_pipeline_en_5.5.0_3.0_1725540647997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_custom_handler_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_custom_handler_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_custom_handler_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/aayushgs/clip-vit-large-patch14-custom-handler + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_en.md new file mode 100644 index 00000000000000..b036a226f03558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English clip_vit_large_patch14_finetuned_general CLIPForZeroShotClassification from vinluvie +author: John Snow Labs +name: clip_vit_large_patch14_finetuned_general +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_finetuned_general` is a English model originally trained by vinluvie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_general_en_5.5.0_3.0_1725540803072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_general_en_5.5.0_3.0_1725540803072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_finetuned_general","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("clip_vit_large_patch14_finetuned_general","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_finetuned_general| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/vinluvie/clip-vit-large-patch14-finetuned-general \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_pipeline_en.md new file mode 100644 index 00000000000000..a2ef569ee83452 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_vit_large_patch14_finetuned_general_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_vit_large_patch14_finetuned_general_pipeline pipeline CLIPForZeroShotClassification from vinluvie +author: John Snow Labs +name: clip_vit_large_patch14_finetuned_general_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_vit_large_patch14_finetuned_general_pipeline` is a English model originally trained by vinluvie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_general_pipeline_en_5.5.0_3.0_1725540883216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_vit_large_patch14_finetuned_general_pipeline_en_5.5.0_3.0_1725540883216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_vit_large_patch14_finetuned_general_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_vit_large_patch14_finetuned_general_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_vit_large_patch14_finetuned_general_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/vinluvie/clip-vit-large-patch14-finetuned-general + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-clip_zabir_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-clip_zabir_2_pipeline_en.md new file mode 100644 index 00000000000000..3914554817152c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-clip_zabir_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_zabir_2_pipeline pipeline CLIPForZeroShotClassification from zabir735 +author: John Snow Labs +name: clip_zabir_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_zabir_2_pipeline` is a English model originally trained by zabir735. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_zabir_2_pipeline_en_5.5.0_3.0_1725540655665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_zabir_2_pipeline_en_5.5.0_3.0_1725540655665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_zabir_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_zabir_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_zabir_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|561.2 MB| + +## References + +https://huggingface.co/zabir735/clip-zabir-2 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-codebertapy_en.md b/docs/_posts/ahmedlone127/2024-09-05-codebertapy_en.md new file mode 100644 index 00000000000000..c82d9d3f256608 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-codebertapy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English codebertapy RoBertaEmbeddings from mrm8488 +author: John Snow Labs +name: codebertapy +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codebertapy` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codebertapy_en_5.5.0_3.0_1725577715648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codebertapy_en_5.5.0_3.0_1725577715648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("codebertapy","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("codebertapy","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codebertapy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.7 MB| + +## References + +https://huggingface.co/mrm8488/CodeBERTaPy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-codebertapy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-codebertapy_pipeline_en.md new file mode 100644 index 00000000000000..9219fd1bb11cd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-codebertapy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English codebertapy_pipeline pipeline RoBertaEmbeddings from mrm8488 +author: John Snow Labs +name: codebertapy_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codebertapy_pipeline` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codebertapy_pipeline_en_5.5.0_3.0_1725577731432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codebertapy_pipeline_en_5.5.0_3.0_1725577731432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("codebertapy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("codebertapy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codebertapy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.8 MB| + +## References + +https://huggingface.co/mrm8488/CodeBERTaPy + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-codegeneration_en.md b/docs/_posts/ahmedlone127/2024-09-05-codegeneration_en.md new file mode 100644 index 00000000000000..dcbd805eed359a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-codegeneration_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English codegeneration RoBertaEmbeddings from SushantGautam +author: John Snow Labs +name: codegeneration +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codegeneration` is a English model originally trained by SushantGautam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codegeneration_en_5.5.0_3.0_1725578492375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codegeneration_en_5.5.0_3.0_1725578492375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("codegeneration","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("codegeneration","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codegeneration| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/SushantGautam/CodeGeneration \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-codegeneration_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-codegeneration_pipeline_en.md new file mode 100644 index 00000000000000..db2ddbade0737f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-codegeneration_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English codegeneration_pipeline pipeline RoBertaEmbeddings from SushantGautam +author: John Snow Labs +name: codegeneration_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codegeneration_pipeline` is a English model originally trained by SushantGautam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codegeneration_pipeline_en_5.5.0_3.0_1725578515742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codegeneration_pipeline_en_5.5.0_3.0_1725578515742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("codegeneration_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("codegeneration_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codegeneration_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/SushantGautam/CodeGeneration + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_en.md b/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_en.md new file mode 100644 index 00000000000000..c200a5326c8b65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English commitpredictor RoBertaEmbeddings from mamiksik +author: John Snow Labs +name: commitpredictor +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`commitpredictor` is a English model originally trained by mamiksik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/commitpredictor_en_5.5.0_3.0_1725578306515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/commitpredictor_en_5.5.0_3.0_1725578306515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("commitpredictor","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("commitpredictor","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|commitpredictor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/mamiksik/CommitPredictor \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_pipeline_en.md new file mode 100644 index 00000000000000..f8604358d9a68e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-commitpredictor_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English commitpredictor_pipeline pipeline RoBertaEmbeddings from mamiksik +author: John Snow Labs +name: commitpredictor_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`commitpredictor_pipeline` is a English model originally trained by mamiksik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/commitpredictor_pipeline_en_5.5.0_3.0_1725578331475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/commitpredictor_pipeline_en_5.5.0_3.0_1725578331475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("commitpredictor_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("commitpredictor_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|commitpredictor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/mamiksik/CommitPredictor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_en.md b/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_en.md new file mode 100644 index 00000000000000..14c79a8a8af623 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English conflibert_named_entity_recognition BertForTokenClassification from eventdata-utd +author: John Snow Labs +name: conflibert_named_entity_recognition +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`conflibert_named_entity_recognition` is a English model originally trained by eventdata-utd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/conflibert_named_entity_recognition_en_5.5.0_3.0_1725563857636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/conflibert_named_entity_recognition_en_5.5.0_3.0_1725563857636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("conflibert_named_entity_recognition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("conflibert_named_entity_recognition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|conflibert_named_entity_recognition| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/eventdata-utd/conflibert-named-entity-recognition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_pipeline_en.md new file mode 100644 index 00000000000000..2085bf04eec06a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-conflibert_named_entity_recognition_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English conflibert_named_entity_recognition_pipeline pipeline BertForTokenClassification from eventdata-utd +author: John Snow Labs +name: conflibert_named_entity_recognition_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`conflibert_named_entity_recognition_pipeline` is a English model originally trained by eventdata-utd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/conflibert_named_entity_recognition_pipeline_en_5.5.0_3.0_1725563879650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/conflibert_named_entity_recognition_pipeline_en_5.5.0_3.0_1725563879650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("conflibert_named_entity_recognition_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("conflibert_named_entity_recognition_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|conflibert_named_entity_recognition_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.1 MB| + +## References + +https://huggingface.co/eventdata-utd/conflibert-named-entity-recognition + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-context_two_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-context_two_pipeline_en.md new file mode 100644 index 00000000000000..dca80d83dd402d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-context_two_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English context_two_pipeline pipeline DistilBertForSequenceClassification from SharonTudi +author: John Snow Labs +name: context_two_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`context_two_pipeline` is a English model originally trained by SharonTudi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/context_two_pipeline_en_5.5.0_3.0_1725507121353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/context_two_pipeline_en_5.5.0_3.0_1725507121353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("context_two_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("context_two_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|context_two_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/SharonTudi/CONTEXT_two + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_pipeline_tr.md new file mode 100644 index 00000000000000..5ef7e9e7f8467c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish convbert_base_turkish_mc4_cased_pipeline pipeline BertEmbeddings from dbmdz +author: John Snow Labs +name: convbert_base_turkish_mc4_cased_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convbert_base_turkish_mc4_cased_pipeline` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_cased_pipeline_tr_5.5.0_3.0_1725552849037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_cased_pipeline_tr_5.5.0_3.0_1725552849037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("convbert_base_turkish_mc4_cased_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("convbert_base_turkish_mc4_cased_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convbert_base_turkish_mc4_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|400.1 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_tr.md b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_tr.md new file mode 100644 index 00000000000000..8abeaef7bb52bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_cased_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish convbert_base_turkish_mc4_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: convbert_base_turkish_mc4_cased +date: 2024-09-05 +tags: [tr, open_source, onnx, embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convbert_base_turkish_mc4_cased` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_cased_tr_5.5.0_3.0_1725552828261.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_cased_tr_5.5.0_3.0_1725552828261.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("convbert_base_turkish_mc4_cased","tr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("convbert_base_turkish_mc4_cased","tr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convbert_base_turkish_mc4_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|tr| +|Size:|400.0 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_uncased_tr.md b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_uncased_tr.md new file mode 100644 index 00000000000000..42d63da7816926 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-convbert_base_turkish_mc4_uncased_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish convbert_base_turkish_mc4_uncased BertEmbeddings from dbmdz +author: John Snow Labs +name: convbert_base_turkish_mc4_uncased +date: 2024-09-05 +tags: [tr, open_source, onnx, embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convbert_base_turkish_mc4_uncased` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_uncased_tr_5.5.0_3.0_1725519892457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_mc4_uncased_tr_5.5.0_3.0_1725519892457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("convbert_base_turkish_mc4_uncased","tr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("convbert_base_turkish_mc4_uncased","tr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convbert_base_turkish_mc4_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|tr| +|Size:|400.0 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cpu_economywide_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-cpu_economywide_classifier_en.md new file mode 100644 index 00000000000000..4cfee48baca4e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cpu_economywide_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English cpu_economywide_classifier MPNetEmbeddings from mtyrrell +author: John Snow Labs +name: cpu_economywide_classifier +date: 2024-09-05 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_economywide_classifier` is a English model originally trained by mtyrrell. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_economywide_classifier_en_5.5.0_3.0_1725575607146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_economywide_classifier_en_5.5.0_3.0_1725575607146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("cpu_economywide_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("cpu_economywide_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_economywide_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +References + +https://huggingface.co/mtyrrell/CPU_Economywide_Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_en.md new file mode 100644 index 00000000000000..64f0d319ab391e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English cpu_target_classifier MPNetEmbeddings from mtyrrell +author: John Snow Labs +name: cpu_target_classifier +date: 2024-09-05 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_target_classifier` is a English model originally trained by mtyrrell. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_target_classifier_en_5.5.0_3.0_1725575420161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_target_classifier_en_5.5.0_3.0_1725575420161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("cpu_target_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("cpu_target_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_target_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +References + +https://huggingface.co/mtyrrell/CPU_Target_Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_pipeline_en.md new file mode 100644 index 00000000000000..70eb5a72b8dd85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cpu_target_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpu_target_classifier_pipeline pipeline MPNetForSequenceClassification from mtyrrell +author: John Snow Labs +name: cpu_target_classifier_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_target_classifier_pipeline` is a English model originally trained by mtyrrell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_target_classifier_pipeline_en_5.5.0_3.0_1725575441162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_target_classifier_pipeline_en_5.5.0_3.0_1725575441162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpu_target_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpu_target_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_target_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/mtyrrell/CPU_Target_Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_en.md new file mode 100644 index 00000000000000..5be21a2436a9bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cross_encoder_v1 XlmRoBertaForSequenceClassification from NeginShams +author: John Snow Labs +name: cross_encoder_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_encoder_v1` is a English model originally trained by NeginShams. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_encoder_v1_en_5.5.0_3.0_1725536227111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_encoder_v1_en_5.5.0_3.0_1725536227111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cross_encoder_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cross_encoder_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_encoder_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|805.0 MB| + +## References + +https://huggingface.co/NeginShams/cross_encoder_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_pipeline_en.md new file mode 100644 index 00000000000000..fb1a60592019c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cross_encoder_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cross_encoder_v1_pipeline pipeline XlmRoBertaForSequenceClassification from NeginShams +author: John Snow Labs +name: cross_encoder_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_encoder_v1_pipeline` is a English model originally trained by NeginShams. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_encoder_v1_pipeline_en_5.5.0_3.0_1725536349456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_encoder_v1_pipeline_en_5.5.0_3.0_1725536349456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cross_encoder_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cross_encoder_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_encoder_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|805.0 MB| + +## References + +https://huggingface.co/NeginShams/cross_encoder_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_fr.md b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_fr.md new file mode 100644 index 00000000000000..32a86634d908cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French crossencoder_camembert_l10_mmarcofr CamemBertForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_camembert_l10_mmarcofr +date: 2024-09-05 +tags: [fr, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_camembert_l10_mmarcofr` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l10_mmarcofr_fr_5.5.0_3.0_1725543786600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l10_mmarcofr_fr_5.5.0_3.0_1725543786600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("crossencoder_camembert_l10_mmarcofr","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("crossencoder_camembert_l10_mmarcofr", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_camembert_l10_mmarcofr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fr| +|Size:|361.6 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-camembert-L10-mmarcoFR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_pipeline_fr.md new file mode 100644 index 00000000000000..b58dea23656342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_camembert_l10_mmarcofr_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French crossencoder_camembert_l10_mmarcofr_pipeline pipeline CamemBertForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_camembert_l10_mmarcofr_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_camembert_l10_mmarcofr_pipeline` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l10_mmarcofr_pipeline_fr_5.5.0_3.0_1725543806832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_camembert_l10_mmarcofr_pipeline_fr_5.5.0_3.0_1725543806832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("crossencoder_camembert_l10_mmarcofr_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("crossencoder_camembert_l10_mmarcofr_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_camembert_l10_mmarcofr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|361.7 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-camembert-L10-mmarcoFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_fr.md b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_fr.md new file mode 100644 index 00000000000000..03cf081211f9f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French crossencoder_mminilmv2_l6_mmarcofr XlmRoBertaForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_mminilmv2_l6_mmarcofr +date: 2024-09-05 +tags: [fr, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_mminilmv2_l6_mmarcofr` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_mminilmv2_l6_mmarcofr_fr_5.5.0_3.0_1725530200017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_mminilmv2_l6_mmarcofr_fr_5.5.0_3.0_1725530200017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("crossencoder_mminilmv2_l6_mmarcofr","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("crossencoder_mminilmv2_l6_mmarcofr", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_mminilmv2_l6_mmarcofr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fr| +|Size:|368.3 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-mMiniLMv2-L6-mmarcoFR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr.md new file mode 100644 index 00000000000000..17ef9741aa68ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French crossencoder_mminilmv2_l6_mmarcofr_pipeline pipeline XlmRoBertaForSequenceClassification from antoinelouis +author: John Snow Labs +name: crossencoder_mminilmv2_l6_mmarcofr_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crossencoder_mminilmv2_l6_mmarcofr_pipeline` is a French model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr_5.5.0_3.0_1725530219892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crossencoder_mminilmv2_l6_mmarcofr_pipeline_fr_5.5.0_3.0_1725530219892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("crossencoder_mminilmv2_l6_mmarcofr_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("crossencoder_mminilmv2_l6_mmarcofr_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crossencoder_mminilmv2_l6_mmarcofr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|368.3 MB| + +## References + +https://huggingface.co/antoinelouis/crossencoder-mMiniLMv2-L6-mmarcoFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-cryptocurrency_intent_search_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-cryptocurrency_intent_search_detection_pipeline_en.md new file mode 100644 index 00000000000000..94fe58a65d8f91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-cryptocurrency_intent_search_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cryptocurrency_intent_search_detection_pipeline pipeline XlmRoBertaForSequenceClassification from dadashzadeh +author: John Snow Labs +name: cryptocurrency_intent_search_detection_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cryptocurrency_intent_search_detection_pipeline` is a English model originally trained by dadashzadeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cryptocurrency_intent_search_detection_pipeline_en_5.5.0_3.0_1725514451943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cryptocurrency_intent_search_detection_pipeline_en_5.5.0_3.0_1725514451943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cryptocurrency_intent_search_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cryptocurrency_intent_search_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cryptocurrency_intent_search_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|813.8 MB| + +## References + +https://huggingface.co/dadashzadeh/cryptocurrency-intent-search-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230814_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230814_pipeline_en.md new file mode 100644 index 00000000000000..d9afee80375fed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230814_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_cos_xlmr_20230814_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_cos_xlmr_20230814_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_cos_xlmr_20230814_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230814_pipeline_en_5.5.0_3.0_1725500086866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230814_pipeline_en_5.5.0_3.0_1725500086866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_cos_xlmr_20230814_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_cos_xlmr_20230814_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_cos_xlmr_20230814_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|875.2 MB| + +## References + +https://huggingface.co/intanm/ct-cos-xlmr-20230814 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230923_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230923_1_pipeline_en.md new file mode 100644 index 00000000000000..3bab9c1dc8415f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_cos_xlmr_20230923_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_cos_xlmr_20230923_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_cos_xlmr_20230923_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_cos_xlmr_20230923_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230923_1_pipeline_en_5.5.0_3.0_1725500123960.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230923_1_pipeline_en_5.5.0_3.0_1725500123960.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_cos_xlmr_20230923_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_cos_xlmr_20230923_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_cos_xlmr_20230923_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-cos-xlmr-20230923-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230908_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230908_pipeline_en.md new file mode 100644 index 00000000000000..a6944f19fc2630 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230908_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_20230908_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230908_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230908_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230908_pipeline_en_5.5.0_3.0_1725499163571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230908_pipeline_en_5.5.0_3.0_1725499163571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_20230908_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_20230908_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230908_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|875.2 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230908 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_en.md new file mode 100644 index 00000000000000..d1a0913bf222d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_kld_xlmr_20230919_1 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230919_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230919_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230919_1_en_5.5.0_3.0_1725571602392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230919_1_en_5.5.0_3.0_1725571602392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230919_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230919_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230919_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|875.8 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230919-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_pipeline_en.md new file mode 100644 index 00000000000000..2172d7799f0bcc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230919_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_20230919_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230919_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230919_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230919_1_pipeline_en_5.5.0_3.0_1725571670134.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230919_1_pipeline_en_5.5.0_3.0_1725571670134.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_20230919_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_20230919_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230919_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|875.8 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230919-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_en.md new file mode 100644 index 00000000000000..60682900c9236b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_kld_xlmr_20230920_2 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230920_2 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230920_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230920_2_en_5.5.0_3.0_1725570835071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230920_2_en_5.5.0_3.0_1725570835071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230920_2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230920_2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230920_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230920-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_pipeline_en.md new file mode 100644 index 00000000000000..996f100865db6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230920_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_20230920_2_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230920_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230920_2_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230920_2_pipeline_en_5.5.0_3.0_1725570911237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230920_2_pipeline_en_5.5.0_3.0_1725570911237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_20230920_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_20230920_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230920_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230920-2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230923_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230923_1_pipeline_en.md new file mode 100644 index 00000000000000..32bb6baefc234f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_20230923_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_20230923_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230923_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230923_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230923_1_pipeline_en_5.5.0_3.0_1725498103529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230923_1_pipeline_en_5.5.0_3.0_1725498103529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_20230923_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_20230923_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230923_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230923-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_idkmrc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_idkmrc_pipeline_en.md new file mode 100644 index 00000000000000..fbeec3f9f7deb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_idkmrc_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_idkmrc_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_idkmrc_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_idkmrc_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_idkmrc_pipeline_en_5.5.0_3.0_1725498784036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_idkmrc_pipeline_en_5.5.0_3.0_1725498784036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_idkmrc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_idkmrc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_idkmrc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-idkmrc + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_en.md new file mode 100644 index 00000000000000..735defc594cc86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_kld_xlmr_squadv2 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_squadv2 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_squadv2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_squadv2_en_5.5.0_3.0_1725567375813.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_squadv2_en_5.5.0_3.0_1725567375813.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_squadv2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_squadv2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_squadv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_pipeline_en.md new file mode 100644 index 00000000000000..d7fa8845c36810 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_kld_xlmr_squadv2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_kld_xlmr_squadv2_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_squadv2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_squadv2_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_squadv2_pipeline_en_5.5.0_3.0_1725567446166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_squadv2_pipeline_en_5.5.0_3.0_1725567446166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_kld_xlmr_squadv2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_kld_xlmr_squadv2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_squadv2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.0 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-squadv2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_en.md new file mode 100644 index 00000000000000..015ed772458a4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_10june23_1 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_10june23_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_10june23_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_10june23_1_en_5.5.0_3.0_1725574487098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_10june23_1_en_5.5.0_3.0_1725574487098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_10june23_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_10june23_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_10june23_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|883.8 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-10june23-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_pipeline_en.md new file mode 100644 index 00000000000000..64f95d95b83423 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_10june23_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_qa_002_10june23_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_10june23_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_10june23_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_10june23_1_pipeline_en_5.5.0_3.0_1725574556556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_10june23_1_pipeline_en_5.5.0_3.0_1725574556556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_qa_002_10june23_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_qa_002_10june23_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_10june23_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|883.8 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-10june23-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_en.md new file mode 100644 index 00000000000000..da26a6d2ef1395 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_11june23_1 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_11june23_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_11june23_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_11june23_1_en_5.5.0_3.0_1725574356430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_11june23_1_en_5.5.0_3.0_1725574356430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_11june23_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_11june23_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_11june23_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|883.7 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-11june23-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_pipeline_en.md new file mode 100644 index 00000000000000..b91f80acf8606c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_11june23_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_qa_002_11june23_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_11june23_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_11june23_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_11june23_1_pipeline_en_5.5.0_3.0_1725574426831.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_11june23_1_pipeline_en_5.5.0_3.0_1725574426831.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_qa_002_11june23_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_qa_002_11june23_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_11june23_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|883.7 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-11june23-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_en.md new file mode 100644 index 00000000000000..8fb326c357770b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_13june23_1 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_13june23_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_13june23_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_13june23_1_en_5.5.0_3.0_1725558647337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_13june23_1_en_5.5.0_3.0_1725558647337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_13june23_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_13june23_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_13june23_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|885.1 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-13june23-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_pipeline_en.md new file mode 100644 index 00000000000000..5838404214a661 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_13june23_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_qa_002_13june23_1_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_13june23_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_13june23_1_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_13june23_1_pipeline_en_5.5.0_3.0_1725558717540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_13june23_1_pipeline_en_5.5.0_3.0_1725558717540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_qa_002_13june23_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_qa_002_13june23_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_13june23_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|885.1 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-13june23-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_en.md new file mode 100644 index 00000000000000..55fa4e0ef457d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_9june23_2 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_9june23_2 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_9june23_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_2_en_5.5.0_3.0_1725567650766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_2_en_5.5.0_3.0_1725567650766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23_2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23_2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_9june23_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-9june23-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_pipeline_en.md new file mode 100644 index 00000000000000..5cdbb744e40c8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_qa_002_9june23_2_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_9june23_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_9june23_2_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_2_pipeline_en_5.5.0_3.0_1725567725054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_2_pipeline_en_5.5.0_3.0_1725567725054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_qa_002_9june23_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_qa_002_9june23_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_9june23_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-9june23-2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_en.md new file mode 100644 index 00000000000000..68a367bc1bbe06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_9june23_3 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_9june23_3 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_9june23_3` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_3_en_5.5.0_3.0_1725559001884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_3_en_5.5.0_3.0_1725559001884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_9june23_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-9june23-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_pipeline_en.md new file mode 100644 index 00000000000000..b6d86dcf77876d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_qa_002_9june23_3_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_9june23_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_9june23_3_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_3_pipeline_en_5.5.0_3.0_1725559080329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_3_pipeline_en_5.5.0_3.0_1725559080329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_qa_002_9june23_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_qa_002_9june23_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_9june23_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-9june23-3 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_en.md new file mode 100644 index 00000000000000..1fd0388ead1010 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_qa_002_9june23_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_qa_002_9june23 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_qa_002_9june23 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_qa_002_9june23` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_en_5.5.0_3.0_1725497022727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_qa_002_9june23_en_5.5.0_3.0_1725497022727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_qa_002_9june23", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_qa_002_9june23| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-qa-002-9june23 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_en.md new file mode 100644 index 00000000000000..ea474cb0b7ef00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_trial_9june23 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_trial_9june23 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_trial_9june23` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_trial_9june23_en_5.5.0_3.0_1725556743411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_trial_9june23_en_5.5.0_3.0_1725556743411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_trial_9june23","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_trial_9june23", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_trial_9june23| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-trial-9june23 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_pipeline_en.md new file mode 100644 index 00000000000000..3918e0eed05243 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ct_trial_9june23_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_trial_9june23_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_trial_9june23_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_trial_9june23_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_trial_9june23_pipeline_en_5.5.0_3.0_1725556816233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_trial_9june23_pipeline_en_5.5.0_3.0_1725556816233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_trial_9june23_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_trial_9june23_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_trial_9june23_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|874.5 MB| + +## References + +https://huggingface.co/intanm/ct-trial-9june23 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_en.md b/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_en.md new file mode 100644 index 00000000000000..f81ef39e17b6be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dataequity_kde4_english_spanish_qlora MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_kde4_english_spanish_qlora +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_kde4_english_spanish_qlora` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_spanish_qlora_en_5.5.0_3.0_1725546205330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_spanish_qlora_en_5.5.0_3.0_1725546205330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("dataequity_kde4_english_spanish_qlora","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("dataequity_kde4_english_spanish_qlora","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_kde4_english_spanish_qlora| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|342.0 MB| + +## References + +https://huggingface.co/dataequity/dataequity-kde4-en-es-qlora \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_pipeline_en.md new file mode 100644 index 00000000000000..7c2be8d125dd9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dataequity_kde4_english_spanish_qlora_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dataequity_kde4_english_spanish_qlora_pipeline pipeline MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_kde4_english_spanish_qlora_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_kde4_english_spanish_qlora_pipeline` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_spanish_qlora_pipeline_en_5.5.0_3.0_1725546302764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_spanish_qlora_pipeline_en_5.5.0_3.0_1725546302764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dataequity_kde4_english_spanish_qlora_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dataequity_kde4_english_spanish_qlora_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_kde4_english_spanish_qlora_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|342.6 MB| + +## References + +https://huggingface.co/dataequity/dataequity-kde4-en-es-qlora + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_en.md b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_en.md new file mode 100644 index 00000000000000..c367b6985275ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dataequity_opus_maltese_spanish_arabic MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_opus_maltese_spanish_arabic +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_opus_maltese_spanish_arabic` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_spanish_arabic_en_5.5.0_3.0_1725544917393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_spanish_arabic_en_5.5.0_3.0_1725544917393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("dataequity_opus_maltese_spanish_arabic","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("dataequity_opus_maltese_spanish_arabic","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_opus_maltese_spanish_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.3 MB| + +## References + +https://huggingface.co/dataequity/dataequity-opus-mt-es-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_pipeline_en.md new file mode 100644 index 00000000000000..356ebe57f7d83e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_spanish_arabic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dataequity_opus_maltese_spanish_arabic_pipeline pipeline MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_opus_maltese_spanish_arabic_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_opus_maltese_spanish_arabic_pipeline` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_spanish_arabic_pipeline_en_5.5.0_3.0_1725544944612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_spanish_arabic_pipeline_en_5.5.0_3.0_1725544944612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dataequity_opus_maltese_spanish_arabic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dataequity_opus_maltese_spanish_arabic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_opus_maltese_spanish_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|527.9 MB| + +## References + +https://huggingface.co/dataequity/dataequity-opus-mt-es-ar + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_tagalog_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_tagalog_english_en.md new file mode 100644 index 00000000000000..523d1091fbbad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dataequity_opus_maltese_tagalog_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dataequity_opus_maltese_tagalog_english MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_opus_maltese_tagalog_english +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_opus_maltese_tagalog_english` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_tagalog_english_en_5.5.0_3.0_1725494542024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_opus_maltese_tagalog_english_en_5.5.0_3.0_1725494542024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("dataequity_opus_maltese_tagalog_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("dataequity_opus_maltese_tagalog_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_opus_maltese_tagalog_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|496.6 MB| + +## References + +https://huggingface.co/dataequity/dataequity-opus-mt-tl-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dbbert_pipeline_el.md b/docs/_posts/ahmedlone127/2024-09-05-dbbert_pipeline_el.md new file mode 100644 index 00000000000000..e5b3cc1827f3e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dbbert_pipeline_el.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Modern Greek (1453-) dbbert_pipeline pipeline BertForTokenClassification from colinswaelens +author: John Snow Labs +name: dbbert_pipeline +date: 2024-09-05 +tags: [el, open_source, pipeline, onnx] +task: Named Entity Recognition +language: el +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbbert_pipeline` is a Modern Greek (1453-) model originally trained by colinswaelens. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbbert_pipeline_el_5.5.0_3.0_1725511735184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbbert_pipeline_el_5.5.0_3.0_1725511735184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dbbert_pipeline", lang = "el") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dbbert_pipeline", lang = "el") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|el| +|Size:|408.3 MB| + +## References + +https://huggingface.co/colinswaelens/DBBErt + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dbert_ai4p_en.md b/docs/_posts/ahmedlone127/2024-09-05-dbert_ai4p_en.md new file mode 100644 index 00000000000000..75998130236ad3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dbert_ai4p_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dbert_ai4p DistilBertForTokenClassification from fktime +author: John Snow Labs +name: dbert_ai4p +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_ai4p` is a English model originally trained by fktime. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_ai4p_en_5.5.0_3.0_1725505843459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_ai4p_en_5.5.0_3.0_1725505843459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("dbert_ai4p","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("dbert_ai4p", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_ai4p| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.7 MB| + +## References + +https://huggingface.co/fktime/dbert_ai4p \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_en.md new file mode 100644 index 00000000000000..e3f24fbaa61ae9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_amazon_reviews_v1_krishankantsinghal DeBertaForSequenceClassification from krishankantsinghal +author: John Snow Labs +name: deberta_amazon_reviews_v1_krishankantsinghal +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_amazon_reviews_v1_krishankantsinghal` is a English model originally trained by krishankantsinghal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_krishankantsinghal_en_5.5.0_3.0_1725562454748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_krishankantsinghal_en_5.5.0_3.0_1725562454748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_krishankantsinghal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_krishankantsinghal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_amazon_reviews_v1_krishankantsinghal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|620.8 MB| + +## References + +https://huggingface.co/krishankantsinghal/deberta_amazon_reviews_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en.md new file mode 100644 index 00000000000000..7c698db72895fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_amazon_reviews_v1_krishankantsinghal_pipeline pipeline DeBertaForSequenceClassification from krishankantsinghal +author: John Snow Labs +name: deberta_amazon_reviews_v1_krishankantsinghal_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_amazon_reviews_v1_krishankantsinghal_pipeline` is a English model originally trained by krishankantsinghal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en_5.5.0_3.0_1725562516648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_krishankantsinghal_pipeline_en_5.5.0_3.0_1725562516648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_amazon_reviews_v1_krishankantsinghal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_amazon_reviews_v1_krishankantsinghal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_amazon_reviews_v1_krishankantsinghal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|620.8 MB| + +## References + +https://huggingface.co/krishankantsinghal/deberta_amazon_reviews_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_en.md new file mode 100644 index 00000000000000..e0f15f63deccfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_attr_score_90fr_final DeBertaForSequenceClassification from Josef0801 +author: John Snow Labs +name: deberta_attr_score_90fr_final +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_attr_score_90fr_final` is a English model originally trained by Josef0801. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_attr_score_90fr_final_en_5.5.0_3.0_1725561230118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_attr_score_90fr_final_en_5.5.0_3.0_1725561230118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_attr_score_90fr_final","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_attr_score_90fr_final", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_attr_score_90fr_final| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|669.6 MB| + +## References + +https://huggingface.co/Josef0801/deberta_attr_score_90fr_final \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_pipeline_en.md new file mode 100644 index 00000000000000..4db55e74f06e96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_attr_score_90fr_final_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_attr_score_90fr_final_pipeline pipeline DeBertaForSequenceClassification from Josef0801 +author: John Snow Labs +name: deberta_attr_score_90fr_final_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_attr_score_90fr_final_pipeline` is a English model originally trained by Josef0801. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_attr_score_90fr_final_pipeline_en_5.5.0_3.0_1725561267277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_attr_score_90fr_final_pipeline_en_5.5.0_3.0_1725561267277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_attr_score_90fr_final_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_attr_score_90fr_final_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_attr_score_90fr_final_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|669.6 MB| + +## References + +https://huggingface.co/Josef0801/deberta_attr_score_90fr_final + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_base_zero_shot_classifier_mnli_anli_v3_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_base_zero_shot_classifier_mnli_anli_v3_en.md new file mode 100644 index 00000000000000..c1328626572303 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_base_zero_shot_classifier_mnli_anli_v3_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: DeBerta Zero-Shot Classification Base - MNLI ANLI (deberta_base_zero_shot_classifier_mnli_anli_v3 +author: John Snow Labs +name: deberta_base_zero_shot_classifier_mnli_anli_v3 +date: 2024-09-05 +tags: [zero_shot, deberta, en, open_source, openvino] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: openvino +annotator: DeBertaForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.deberta_base_zero_shot_classifier_mnli_anli_v3 is a English model originally trained by MoritzLaurer. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_base_zero_shot_classifier_mnli_anli_v3_en_5.5.0_3.0_1725567250369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_base_zero_shot_classifier_mnli_anli_v3_en_5.5.0_3.0_1725567250369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ +.setInputCol('text') \ +.setOutputCol('document') + +tokenizer = Tokenizer() \ +.setInputCols(['document']) \ +.setOutputCol('token') + +zeroShotClassifier = DeBertaForZeroShotClassification \ +.pretrained('deberta_base_zero_shot_classifier_mnli_anli_v3', 'en') \ +.setInputCols(['token', 'document']) \ +.setOutputCol('class') \ +.setCaseSensitive(True) \ +.setMaxSentenceLength(512) \ +.setCandidateLabels(["urgent", "mobile", "travel", "movie", "music", "sport", "weather", "technology"]) + +pipeline = Pipeline(stages=[ +document_assembler, +tokenizer, +zeroShotClassifier +]) + +example = spark.createDataFrame([['I have a problem with my iphone that needs to be resolved asap!!']]).toDF("text") +result = pipeline.fit(example).transform(example) +``` +```scala +val document_assembler = DocumentAssembler() +.setInputCol("text") +.setOutputCol("document") + +val tokenizer = Tokenizer() +.setInputCols("document") +.setOutputCol("token") + +val zeroShotClassifier = DeBertaForZeroShotClassification.pretrained("deberta_base_zero_shot_classifier_mnli_anli_v3", "en") +.setInputCols("document", "token") +.setOutputCol("class") +.setCaseSensitive(true) +.setMaxSentenceLength(512) +.setCandidateLabels(Array("urgent", "mobile", "travel", "movie", "music", "sport", "weather", "technology")) + +val pipeline = new Pipeline().setStages(Array(document_assembler, tokenizer, zeroShotClassifier)) + +val example = Seq("I have a problem with my iphone that needs to be resolved asap!!").toDS.toDF("text") + +val result = pipeline.fit(example).transform(example) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_base_zero_shot_classifier_mnli_anli_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[token, document]| +|Output Labels:|[label]| +|Language:|en| +|Size:|473.3 MB| +|Case sensitive:|true| + +## References + +References + +https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_sentencelevel_nofeatures_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_sentencelevel_nofeatures_pipeline_en.md new file mode 100644 index 00000000000000..672d2c15d95f7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_sentencelevel_nofeatures_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_sentencelevel_nofeatures_pipeline pipeline DeBertaForSequenceClassification from jeffyelson +author: John Snow Labs +name: deberta_sentencelevel_nofeatures_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_sentencelevel_nofeatures_pipeline` is a English model originally trained by jeffyelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_nofeatures_pipeline_en_5.5.0_3.0_1725562075716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_sentencelevel_nofeatures_pipeline_en_5.5.0_3.0_1725562075716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_sentencelevel_nofeatures_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_sentencelevel_nofeatures_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_sentencelevel_nofeatures_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|591.6 MB| + +## References + +https://huggingface.co/jeffyelson/deberta_sentencelevel_nofeatures + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_en.md new file mode 100644 index 00000000000000..f232458502f6f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_senti_over DeBertaForSequenceClassification from keonju +author: John Snow Labs +name: deberta_senti_over +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_senti_over` is a English model originally trained by keonju. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_senti_over_en_5.5.0_3.0_1725562304544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_senti_over_en_5.5.0_3.0_1725562304544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_senti_over","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_senti_over", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_senti_over| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/keonju/deberta_senti_over \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_pipeline_en.md new file mode 100644 index 00000000000000..c9f1787b449673 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_senti_over_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_senti_over_pipeline pipeline DeBertaForSequenceClassification from keonju +author: John Snow Labs +name: deberta_senti_over_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_senti_over_pipeline` is a English model originally trained by keonju. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_senti_over_pipeline_en_5.5.0_3.0_1725562365735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_senti_over_pipeline_en_5.5.0_3.0_1725562365735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_senti_over_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_senti_over_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_senti_over_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/keonju/deberta_senti_over + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_en.md new file mode 100644 index 00000000000000..bde85e105542a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base__sst2__all_train DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_base__sst2__all_train +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base__sst2__all_train` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base__sst2__all_train_en_5.5.0_3.0_1725560812224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base__sst2__all_train_en_5.5.0_3.0_1725560812224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base__sst2__all_train","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base__sst2__all_train", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base__sst2__all_train| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|564.7 MB| + +## References + +https://huggingface.co/SetFit/deberta-v3-base__sst2__all-train \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_pipeline_en.md new file mode 100644 index 00000000000000..24951b2e07798b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base__sst2__all_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base__sst2__all_train_pipeline pipeline DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_base__sst2__all_train_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base__sst2__all_train_pipeline` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base__sst2__all_train_pipeline_en_5.5.0_3.0_1725560885945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base__sst2__all_train_pipeline_en_5.5.0_3.0_1725560885945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base__sst2__all_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base__sst2__all_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base__sst2__all_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|564.7 MB| + +## References + +https://huggingface.co/SetFit/deberta-v3-base__sst2__all-train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_finetuned_cola_midterm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_finetuned_cola_midterm_pipeline_en.md new file mode 100644 index 00000000000000..5af1521c737716 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_finetuned_cola_midterm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_finetuned_cola_midterm_pipeline pipeline DeBertaForSequenceClassification from kbberendsen +author: John Snow Labs +name: deberta_v3_base_finetuned_cola_midterm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_finetuned_cola_midterm_pipeline` is a English model originally trained by kbberendsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_cola_midterm_pipeline_en_5.5.0_3.0_1725560913341.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_finetuned_cola_midterm_pipeline_en_5.5.0_3.0_1725560913341.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_finetuned_cola_midterm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_finetuned_cola_midterm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_finetuned_cola_midterm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|555.8 MB| + +## References + +https://huggingface.co/kbberendsen/deberta-v3-base-finetuned-cola-midterm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_en.md new file mode 100644 index 00000000000000..a07e34ff89272c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_nli_2x_v0 DeBertaForSequenceClassification from mariolinml +author: John Snow Labs +name: deberta_v3_base_nli_2x_v0 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_nli_2x_v0` is a English model originally trained by mariolinml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_nli_2x_v0_en_5.5.0_3.0_1725562054205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_nli_2x_v0_en_5.5.0_3.0_1725562054205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_nli_2x_v0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_nli_2x_v0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_nli_2x_v0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|576.4 MB| + +## References + +https://huggingface.co/mariolinml/deberta-v3-base_nli_2x_v0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_pipeline_en.md new file mode 100644 index 00000000000000..c5db995129b548 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_nli_2x_v0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_nli_2x_v0_pipeline pipeline DeBertaForSequenceClassification from mariolinml +author: John Snow Labs +name: deberta_v3_base_nli_2x_v0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_nli_2x_v0_pipeline` is a English model originally trained by mariolinml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_nli_2x_v0_pipeline_en_5.5.0_3.0_1725562134734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_nli_2x_v0_pipeline_en_5.5.0_3.0_1725562134734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_nli_2x_v0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_nli_2x_v0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_nli_2x_v0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|576.5 MB| + +## References + +https://huggingface.co/mariolinml/deberta-v3-base_nli_2x_v0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_prompt_injection_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_prompt_injection_v2_en.md new file mode 100644 index 00000000000000..93753534f32d78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_prompt_injection_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_prompt_injection_v2 DeBertaForSequenceClassification from protectai +author: John Snow Labs +name: deberta_v3_base_prompt_injection_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_prompt_injection_v2` is a English model originally trained by protectai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_prompt_injection_v2_en_5.5.0_3.0_1725561587990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_prompt_injection_v2_en_5.5.0_3.0_1725561587990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_prompt_injection_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_prompt_injection_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_prompt_injection_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|662.6 MB| + +## References + +https://huggingface.co/protectai/deberta-v3-base-prompt-injection-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en.md new file mode 100644 index 00000000000000..53edddf5db04d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en_5.5.0_3.0_1725561402815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_en_5.5.0_3.0_1725561402815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|610.5 MB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-base-survey-new_fact_main_passage-rater-half-human \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en.md new file mode 100644 index 00000000000000..f18860e4ea2bc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en_5.5.0_3.0_1725561450795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline_en_5.5.0_3.0_1725561450795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_survey_nepal_bhasa_fact_main_passage_rater_half_human_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|610.5 MB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-base-survey-new_fact_main_passage-rater-half-human + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_en.md new file mode 100644 index 00000000000000..76703352f6c44f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_tasksource_toxicity DeBertaForSequenceClassification from sileod +author: John Snow Labs +name: deberta_v3_base_tasksource_toxicity +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_tasksource_toxicity` is a English model originally trained by sileod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_toxicity_en_5.5.0_3.0_1725562826031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_toxicity_en_5.5.0_3.0_1725562826031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_tasksource_toxicity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_tasksource_toxicity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_tasksource_toxicity| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|689.1 MB| + +## References + +https://huggingface.co/sileod/deberta-v3-base-tasksource-toxicity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_pipeline_en.md new file mode 100644 index 00000000000000..1e0edee56f8d93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_base_tasksource_toxicity_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_tasksource_toxicity_pipeline pipeline DeBertaForSequenceClassification from sileod +author: John Snow Labs +name: deberta_v3_base_tasksource_toxicity_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_tasksource_toxicity_pipeline` is a English model originally trained by sileod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_toxicity_pipeline_en_5.5.0_3.0_1725562863837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_toxicity_pipeline_en_5.5.0_3.0_1725562863837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_tasksource_toxicity_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_tasksource_toxicity_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_tasksource_toxicity_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|689.1 MB| + +## References + +https://huggingface.co/sileod/deberta-v3-base-tasksource-toxicity + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_fever_garcialnk_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_fever_garcialnk_pipeline_en.md new file mode 100644 index 00000000000000..10f8be1c4972eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_fever_garcialnk_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_fever_garcialnk_pipeline pipeline DeBertaForSequenceClassification from GarciaLnk +author: John Snow Labs +name: deberta_v3_large_fever_garcialnk_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_fever_garcialnk_pipeline` is a English model originally trained by GarciaLnk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_fever_garcialnk_pipeline_en_5.5.0_3.0_1725561279680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_fever_garcialnk_pipeline_en_5.5.0_3.0_1725561279680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_fever_garcialnk_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_fever_garcialnk_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_fever_garcialnk_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/GarciaLnk/deberta-v3-large-fever + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_en.md new file mode 100644 index 00000000000000..f9fb2d4b80a31f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_cola_midterm DeBertaForSequenceClassification from kbberendsen +author: John Snow Labs +name: deberta_v3_large_finetuned_cola_midterm +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_cola_midterm` is a English model originally trained by kbberendsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_cola_midterm_en_5.5.0_3.0_1725562635671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_cola_midterm_en_5.5.0_3.0_1725562635671.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_finetuned_cola_midterm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_finetuned_cola_midterm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_cola_midterm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|554.6 MB| + +## References + +https://huggingface.co/kbberendsen/deberta-v3-large-finetuned-cola-midterm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_pipeline_en.md new file mode 100644 index 00000000000000..cc61f71e6b4528 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_finetuned_cola_midterm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_finetuned_cola_midterm_pipeline pipeline DeBertaForSequenceClassification from kbberendsen +author: John Snow Labs +name: deberta_v3_large_finetuned_cola_midterm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_finetuned_cola_midterm_pipeline` is a English model originally trained by kbberendsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_cola_midterm_pipeline_en_5.5.0_3.0_1725562724527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_finetuned_cola_midterm_pipeline_en_5.5.0_3.0_1725562724527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_finetuned_cola_midterm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_finetuned_cola_midterm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_finetuned_cola_midterm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|554.6 MB| + +## References + +https://huggingface.co/kbberendsen/deberta-v3-large-finetuned-cola-midterm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_en.md new file mode 100644 index 00000000000000..9b89779a0e106b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_related_passage_consistency_rater_all DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_related_passage_consistency_rater_all +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_related_passage_consistency_rater_all` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_all_en_5.5.0_3.0_1725561981565.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_all_en_5.5.0_3.0_1725561981565.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_consistency_rater_all","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_consistency_rater_all", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_related_passage_consistency_rater_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-related_passage_consistency-rater-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en.md new file mode 100644 index 00000000000000..94f8294a9c87d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en_5.5.0_3.0_1725562113378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline_en_5.5.0_3.0_1725562113378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_related_passage_consistency_rater_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-related_passage_consistency-rater-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_en.md new file mode 100644 index 00000000000000..39ebe0df0576e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_topicality_rater_half DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_topicality_rater_half +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_topicality_rater_half` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_topicality_rater_half_en_5.5.0_3.0_1725561085089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_topicality_rater_half_en_5.5.0_3.0_1725561085089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_topicality_rater_half","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_topicality_rater_half", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_topicality_rater_half| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-topicality-rater-half \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_pipeline_en.md new file mode 100644 index 00000000000000..acc767bc8f4738 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_large_survey_topicality_rater_half_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_topicality_rater_half_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_topicality_rater_half_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_topicality_rater_half_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_topicality_rater_half_pipeline_en_5.5.0_3.0_1725561202749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_topicality_rater_half_pipeline_en_5.5.0_3.0_1725561202749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_topicality_rater_half_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_topicality_rater_half_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_topicality_rater_half_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-topicality-rater-half + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_en.md new file mode 100644 index 00000000000000..74b0e2fafb4e83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_small_multilabel_mixed DeBertaForSequenceClassification from ep9io +author: John Snow Labs +name: deberta_v3_small_multilabel_mixed +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_small_multilabel_mixed` is a English model originally trained by ep9io. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_small_multilabel_mixed_en_5.5.0_3.0_1725561133889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_small_multilabel_mixed_en_5.5.0_3.0_1725561133889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_small_multilabel_mixed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_small_multilabel_mixed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_small_multilabel_mixed| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|426.5 MB| + +## References + +https://huggingface.co/ep9io/deberta-v3-small-multilabel-mixed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_pipeline_en.md new file mode 100644 index 00000000000000..7630ae92108a31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_small_multilabel_mixed_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_small_multilabel_mixed_pipeline pipeline DeBertaForSequenceClassification from ep9io +author: John Snow Labs +name: deberta_v3_small_multilabel_mixed_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_small_multilabel_mixed_pipeline` is a English model originally trained by ep9io. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_small_multilabel_mixed_pipeline_en_5.5.0_3.0_1725561174213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_small_multilabel_mixed_pipeline_en_5.5.0_3.0_1725561174213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_small_multilabel_mixed_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_small_multilabel_mixed_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_small_multilabel_mixed_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|426.5 MB| + +## References + +https://huggingface.co/ep9io/deberta-v3-small-multilabel-mixed + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_en.md new file mode 100644 index 00000000000000..cd063cf3223d3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_sta_rel DeBertaForSequenceClassification from orbis-marianne +author: John Snow Labs +name: deberta_v3_sta_rel +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_sta_rel` is a English model originally trained by orbis-marianne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_sta_rel_en_5.5.0_3.0_1725562141590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_sta_rel_en_5.5.0_3.0_1725562141590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_sta_rel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_sta_rel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_sta_rel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|573.0 MB| + +## References + +https://huggingface.co/orbis-marianne/deberta-v3-sta-rel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_pipeline_en.md new file mode 100644 index 00000000000000..35a5738c33f530 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-deberta_v3_sta_rel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_sta_rel_pipeline pipeline DeBertaForSequenceClassification from orbis-marianne +author: John Snow Labs +name: deberta_v3_sta_rel_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_sta_rel_pipeline` is a English model originally trained by orbis-marianne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_sta_rel_pipeline_en_5.5.0_3.0_1725562222693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_sta_rel_pipeline_en_5.5.0_3.0_1725562222693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_sta_rel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_sta_rel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_sta_rel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|573.0 MB| + +## References + +https://huggingface.co/orbis-marianne/deberta-v3-sta-rel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_en.md new file mode 100644 index 00000000000000..99d272b9ac7d83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English delip_vit_large_512_v0_1 CLIPForZeroShotClassification from Zhiyuan-Fan +author: John Snow Labs +name: delip_vit_large_512_v0_1 +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`delip_vit_large_512_v0_1` is a English model originally trained by Zhiyuan-Fan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/delip_vit_large_512_v0_1_en_5.5.0_3.0_1725540976394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/delip_vit_large_512_v0_1_en_5.5.0_3.0_1725540976394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("delip_vit_large_512_v0_1","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("delip_vit_large_512_v0_1","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|delip_vit_large_512_v0_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/Zhiyuan-Fan/delip-vit-large-512-v0.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_pipeline_en.md new file mode 100644 index 00000000000000..9a908b796b2df0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-delip_vit_large_512_v0_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English delip_vit_large_512_v0_1_pipeline pipeline CLIPForZeroShotClassification from Zhiyuan-Fan +author: John Snow Labs +name: delip_vit_large_512_v0_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`delip_vit_large_512_v0_1_pipeline` is a English model originally trained by Zhiyuan-Fan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/delip_vit_large_512_v0_1_pipeline_en_5.5.0_3.0_1725541051333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/delip_vit_large_512_v0_1_pipeline_en_5.5.0_3.0_1725541051333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("delip_vit_large_512_v0_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("delip_vit_large_512_v0_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|delip_vit_large_512_v0_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/Zhiyuan-Fan/delip-vit-large-512-v0.1 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_he.md b/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_he.md new file mode 100644 index 00000000000000..baced045412a45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_he.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hebrew dictabert_ner BertForTokenClassification from dicta-il +author: John Snow Labs +name: dictabert_ner +date: 2024-09-05 +tags: [he, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictabert_ner` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictabert_ner_he_5.5.0_3.0_1725511630160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictabert_ner_he_5.5.0_3.0_1725511630160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("dictabert_ner","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("dictabert_ner", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictabert_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|he| +|Size:|668.3 MB| + +## References + +https://huggingface.co/dicta-il/dictabert-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_pipeline_he.md b/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_pipeline_he.md new file mode 100644 index 00000000000000..8b5483a357bdda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dictabert_ner_pipeline_he.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hebrew dictabert_ner_pipeline pipeline BertForTokenClassification from dicta-il +author: John Snow Labs +name: dictabert_ner_pipeline +date: 2024-09-05 +tags: [he, open_source, pipeline, onnx] +task: Named Entity Recognition +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictabert_ner_pipeline` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictabert_ner_pipeline_he_5.5.0_3.0_1725511669303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictabert_ner_pipeline_he_5.5.0_3.0_1725511669303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dictabert_ner_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dictabert_ner_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictabert_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|668.3 MB| + +## References + +https://huggingface.co/dicta-il/dictabert-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_en.md new file mode 100644 index 00000000000000..a3c4ab3e17e60d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English digital_physical_classifier_v2 RoBertaForSequenceClassification from davis-etsy +author: John Snow Labs +name: digital_physical_classifier_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`digital_physical_classifier_v2` is a English model originally trained by davis-etsy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/digital_physical_classifier_v2_en_5.5.0_3.0_1725542299597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/digital_physical_classifier_v2_en_5.5.0_3.0_1725542299597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("digital_physical_classifier_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("digital_physical_classifier_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|digital_physical_classifier_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|429.6 MB| + +## References + +https://huggingface.co/davis-etsy/digital_physical_classifier_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_pipeline_en.md new file mode 100644 index 00000000000000..52803bae1bfed8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-digital_physical_classifier_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English digital_physical_classifier_v2_pipeline pipeline RoBertaForSequenceClassification from davis-etsy +author: John Snow Labs +name: digital_physical_classifier_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`digital_physical_classifier_v2_pipeline` is a English model originally trained by davis-etsy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/digital_physical_classifier_v2_pipeline_en_5.5.0_3.0_1725542329959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/digital_physical_classifier_v2_pipeline_en_5.5.0_3.0_1725542329959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("digital_physical_classifier_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("digital_physical_classifier_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|digital_physical_classifier_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|429.7 MB| + +## References + +https://huggingface.co/davis-etsy/digital_physical_classifier_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_en.md b/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_en.md new file mode 100644 index 00000000000000..d6d134312a1485 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English discharge_albert AlbertEmbeddings from Vasudev +author: John Snow Labs +name: discharge_albert +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, albert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`discharge_albert` is a English model originally trained by Vasudev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/discharge_albert_en_5.5.0_3.0_1725528229195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/discharge_albert_en_5.5.0_3.0_1725528229195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("discharge_albert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("discharge_albert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|discharge_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/Vasudev/discharge_albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_pipeline_en.md new file mode 100644 index 00000000000000..c721a76b5c08b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-discharge_albert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English discharge_albert_pipeline pipeline AlbertEmbeddings from Vasudev +author: John Snow Labs +name: discharge_albert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`discharge_albert_pipeline` is a English model originally trained by Vasudev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/discharge_albert_pipeline_en_5.5.0_3.0_1725528231643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/discharge_albert_pipeline_en_5.5.0_3.0_1725528231643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("discharge_albert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("discharge_albert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|discharge_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/Vasudev/discharge_albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-disorbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-disorbert_pipeline_en.md new file mode 100644 index 00000000000000..b004517c764375 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-disorbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English disorbert_pipeline pipeline BertEmbeddings from citiusLTL +author: John Snow Labs +name: disorbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disorbert_pipeline` is a English model originally trained by citiusLTL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disorbert_pipeline_en_5.5.0_3.0_1725519886727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disorbert_pipeline_en_5.5.0_3.0_1725519886727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("disorbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("disorbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disorbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/citiusLTL/DisorBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_en.md new file mode 100644 index 00000000000000..395f6054e9f94c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English distilbert_amazon_shoe_review DistilBertForSequenceClassification from rajanchaturvedi +author: John Snow Labs +name: distilbert_amazon_shoe_review +date: 2024-09-05 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_amazon_shoe_review` is a English model originally trained by rajanchaturvedi. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_amazon_shoe_review_en_5.5.0_3.0_1725580493218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_amazon_shoe_review_en_5.5.0_3.0_1725580493218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_amazon_shoe_review","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_amazon_shoe_review","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_amazon_shoe_review| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +References + +https://huggingface.co/rajanchaturvedi/distilbert-amazon-shoe-review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_pipeline_en.md new file mode 100644 index 00000000000000..1f452f49da7869 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_amazon_shoe_review_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_amazon_shoe_review_pipeline pipeline DistilBertForSequenceClassification from mazed +author: John Snow Labs +name: distilbert_amazon_shoe_review_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_amazon_shoe_review_pipeline` is a English model originally trained by mazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_amazon_shoe_review_pipeline_en_5.5.0_3.0_1725580505514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_amazon_shoe_review_pipeline_en_5.5.0_3.0_1725580505514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_amazon_shoe_review_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_amazon_shoe_review_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_amazon_shoe_review_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/mazed/distilbert-amazon-shoe-review + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_dumiiii_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_dumiiii_en.md new file mode 100644 index 00000000000000..a5b08f95cd0498 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_dumiiii_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_cased_ner_dumiiii DistilBertForTokenClassification from Dumiiii +author: John Snow Labs +name: distilbert_base_cased_ner_dumiiii +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_ner_dumiiii` is a English model originally trained by Dumiiii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_ner_dumiiii_en_5.5.0_3.0_1725505815758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_ner_dumiiii_en_5.5.0_3.0_1725505815758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_ner_dumiiii","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_ner_dumiiii", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_ner_dumiiii| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Dumiiii/distilbert-base-cased-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_tunahangokcimen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_tunahangokcimen_pipeline_en.md new file mode 100644 index 00000000000000..1b68b1b6eb2c25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_cased_ner_tunahangokcimen_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_cased_ner_tunahangokcimen_pipeline pipeline DistilBertForTokenClassification from TunahanGokcimen +author: John Snow Labs +name: distilbert_base_cased_ner_tunahangokcimen_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_ner_tunahangokcimen_pipeline` is a English model originally trained by TunahanGokcimen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_ner_tunahangokcimen_pipeline_en_5.5.0_3.0_1725495673570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_ner_tunahangokcimen_pipeline_en_5.5.0_3.0_1725495673570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_ner_tunahangokcimen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_ner_tunahangokcimen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_ner_tunahangokcimen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/TunahanGokcimen/distilbert-base-cased-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_id.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_id.md new file mode 100644 index 00000000000000..44df30e78251ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian distilbert_base_indonesian_finetuned_prdect_indonesian DistilBertEmbeddings from albarpambagio +author: John Snow Labs +name: distilbert_base_indonesian_finetuned_prdect_indonesian +date: 2024-09-05 +tags: [id, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_indonesian_finetuned_prdect_indonesian` is a Indonesian model originally trained by albarpambagio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_finetuned_prdect_indonesian_id_5.5.0_3.0_1725524596597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_finetuned_prdect_indonesian_id_5.5.0_3.0_1725524596597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_indonesian_finetuned_prdect_indonesian","id") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_indonesian_finetuned_prdect_indonesian","id") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_indonesian_finetuned_prdect_indonesian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|id| +|Size:|253.0 MB| + +## References + +https://huggingface.co/albarpambagio/distilbert-base-indonesian-finetuned-PRDECT-ID \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id.md new file mode 100644 index 00000000000000..fa02ba09a5c45b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Indonesian distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline pipeline DistilBertEmbeddings from albarpambagio +author: John Snow Labs +name: distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline +date: 2024-09-05 +tags: [id, open_source, pipeline, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline` is a Indonesian model originally trained by albarpambagio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id_5.5.0_3.0_1725524608699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline_id_5.5.0_3.0_1725524608699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_indonesian_finetuned_prdect_indonesian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|253.0 MB| + +## References + +https://huggingface.co/albarpambagio/distilbert-base-indonesian-finetuned-PRDECT-ID + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx.md new file mode 100644 index 00000000000000..b5300d42f677e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish DistilBertEmbeddings from lusxvr +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish +date: 2024-09-05 +tags: [xx, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish` is a Multilingual model originally trained by lusxvr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx_5.5.0_3.0_1725524804760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish_xx_5.5.0_3.0_1725524804760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_german_portuguese_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/lusxvr/distilbert-base-multilingual-cased-finetuned-de_pt_es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_en.md new file mode 100644 index 00000000000000..ec56f815d3c841 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_squad_tfm_1_question_answering BertForQuestionAnswering from JoelVIU +author: John Snow Labs +name: distilbert_base_squad_tfm_1_question_answering +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_squad_tfm_1_question_answering` is a English model originally trained by JoelVIU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_squad_tfm_1_question_answering_en_5.5.0_3.0_1725554479713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_squad_tfm_1_question_answering_en_5.5.0_3.0_1725554479713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("distilbert_base_squad_tfm_1_question_answering","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("distilbert_base_squad_tfm_1_question_answering", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_squad_tfm_1_question_answering| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/JoelVIU/distilbert-base-squad-TFM_1-Question-Answering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_pipeline_en.md new file mode 100644 index 00000000000000..c354a37ab8a395 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_squad_tfm_1_question_answering_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_squad_tfm_1_question_answering_pipeline pipeline BertForQuestionAnswering from JoelVIU +author: John Snow Labs +name: distilbert_base_squad_tfm_1_question_answering_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_squad_tfm_1_question_answering_pipeline` is a English model originally trained by JoelVIU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_squad_tfm_1_question_answering_pipeline_en_5.5.0_3.0_1725554501426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_squad_tfm_1_question_answering_pipeline_en_5.5.0_3.0_1725554501426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_squad_tfm_1_question_answering_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_squad_tfm_1_question_answering_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_squad_tfm_1_question_answering_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/JoelVIU/distilbert-base-squad-TFM_1-Question-Answering + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_emotion_ft_0703_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_emotion_ft_0703_pipeline_en.md new file mode 100644 index 00000000000000..7f1ea4abef3497 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_emotion_ft_0703_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_emotion_ft_0703_pipeline pipeline DistilBertForSequenceClassification from liuguojing +author: John Snow Labs +name: distilbert_base_uncased_emotion_ft_0703_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_emotion_ft_0703_pipeline` is a English model originally trained by liuguojing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_ft_0703_pipeline_en_5.5.0_3.0_1725580546198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_ft_0703_pipeline_en_5.5.0_3.0_1725580546198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_emotion_ft_0703_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_emotion_ft_0703_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_emotion_ft_0703_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/liuguojing/distilbert-base-uncased_emotion_ft_0703 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_en.md new file mode 100644 index 00000000000000..46f96284980627 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_5to9 DistilBertForSequenceClassification from 5to9 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_5to9 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_5to9` is a English model originally trained by 5to9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_5to9_en_5.5.0_3.0_1725579959060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_5to9_en_5.5.0_3.0_1725579959060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_5to9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_5to9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_5to9| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/5to9/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en.md new file mode 100644 index 00000000000000..287a167cc5e506 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_5to9_pipeline pipeline DistilBertForSequenceClassification from 5to9 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_5to9_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_5to9_pipeline` is a English model originally trained by 5to9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en_5.5.0_3.0_1725579973188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_5to9_pipeline_en_5.5.0_3.0_1725579973188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_5to9_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_5to9_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_5to9_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/5to9/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_en.md new file mode 100644 index 00000000000000..b0d1eb7adc0c9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_aikozvezda DistilBertForSequenceClassification from Aikozvezda +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_aikozvezda +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_aikozvezda` is a English model originally trained by Aikozvezda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aikozvezda_en_5.5.0_3.0_1725580283506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aikozvezda_en_5.5.0_3.0_1725580283506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_aikozvezda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_aikozvezda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_aikozvezda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Aikozvezda/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en.md new file mode 100644 index 00000000000000..7e59505f7b999e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline pipeline DistilBertForSequenceClassification from Aikozvezda +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline` is a English model originally trained by Aikozvezda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en_5.5.0_3.0_1725580296487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline_en_5.5.0_3.0_1725580296487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_aikozvezda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Aikozvezda/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_elshehawy_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_elshehawy_en.md new file mode 100644 index 00000000000000..ba8f563e561a7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_elshehawy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_elshehawy DistilBertForSequenceClassification from elshehawy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_elshehawy +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_elshehawy` is a English model originally trained by elshehawy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_elshehawy_en_5.5.0_3.0_1725507350137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_elshehawy_en_5.5.0_3.0_1725507350137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_elshehawy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_elshehawy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_elshehawy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/elshehawy/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_en.md new file mode 100644 index 00000000000000..9f7e99648047a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_parthiv99 DistilBertForSequenceClassification from parthiv99 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_parthiv99 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_parthiv99` is a English model originally trained by parthiv99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_parthiv99_en_5.5.0_3.0_1725507068797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_parthiv99_en_5.5.0_3.0_1725507068797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_parthiv99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_parthiv99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_parthiv99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/parthiv99/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en.md new file mode 100644 index 00000000000000..16b3e7f95c6ae2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline pipeline DistilBertForSequenceClassification from parthiv99 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline` is a English model originally trained by parthiv99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en_5.5.0_3.0_1725507080833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline_en_5.5.0_3.0_1725507080833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_parthiv99_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/parthiv99/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en.md new file mode 100644 index 00000000000000..0e327e8e59ba5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_wlrnfyd0329 DistilBertForSequenceClassification from wlrnfyd0329 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_wlrnfyd0329 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_wlrnfyd0329` is a English model originally trained by wlrnfyd0329. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en_5.5.0_3.0_1725507578050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_en_5.5.0_3.0_1725507578050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_wlrnfyd0329","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_wlrnfyd0329", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_wlrnfyd0329| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/wlrnfyd0329/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en.md new file mode 100644 index 00000000000000..d297e491803c8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline pipeline DistilBertForSequenceClassification from wlrnfyd0329 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline` is a English model originally trained by wlrnfyd0329. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en_5.5.0_3.0_1725507589799.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline_en_5.5.0_3.0_1725507589799.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_wlrnfyd0329_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/wlrnfyd0329/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_en.md new file mode 100644 index 00000000000000..1aee9baa4b9370 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotions_klenam DistilBertForSequenceClassification from Klenam +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotions_klenam +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotions_klenam` is a English model originally trained by Klenam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotions_klenam_en_5.5.0_3.0_1725507704367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotions_klenam_en_5.5.0_3.0_1725507704367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotions_klenam","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotions_klenam", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotions_klenam| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Klenam/distilbert-base-uncased-finetuned-emotions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en.md new file mode 100644 index 00000000000000..f4bd1e58cf7c6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotions_klenam_pipeline pipeline DistilBertForSequenceClassification from Klenam +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotions_klenam_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotions_klenam_pipeline` is a English model originally trained by Klenam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en_5.5.0_3.0_1725507717294.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotions_klenam_pipeline_en_5.5.0_3.0_1725507717294.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotions_klenam_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotions_klenam_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotions_klenam_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Klenam/distilbert-base-uncased-finetuned-emotions + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en.md new file mode 100644 index 00000000000000..74f8c8952360c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline pipeline DistilBertEmbeddings from abh1na5 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline` is a English model originally trained by abh1na5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en_5.5.0_3.0_1725524105289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline_en_5.5.0_3.0_1725524105289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_abh1na5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/abh1na5/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en.md new file mode 100644 index 00000000000000..a391fe31fe9e76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline pipeline DistilBertEmbeddings from Pragash-Mohanarajah +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline` is a English model originally trained by Pragash-Mohanarajah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en_5.5.0_3.0_1725524314693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline_en_5.5.0_3.0_1725524314693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_pragash_mohanarajah_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Pragash-Mohanarajah/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en.md new file mode 100644 index 00000000000000..b3b5b928adc3cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir DistilBertEmbeddings from rajabilalnazir +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir` is a English model originally trained by rajabilalnazir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en_5.5.0_3.0_1725524421478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_en_5.5.0_3.0_1725524421478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rajabilalnazir/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en.md new file mode 100644 index 00000000000000..bef901f632c237 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline pipeline DistilBertEmbeddings from rajabilalnazir +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline` is a English model originally trained by rajabilalnazir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en_5.5.0_3.0_1725524434204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline_en_5.5.0_3.0_1725524434204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rajabilalnazir_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rajabilalnazir/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_en.md new file mode 100644 index 00000000000000..9a8af26b647a40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_alex_atelo DistilBertEmbeddings from alex-atelo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_alex_atelo +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_alex_atelo` is a English model originally trained by alex-atelo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_alex_atelo_en_5.5.0_3.0_1725524685265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_alex_atelo_en_5.5.0_3.0_1725524685265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_alex_atelo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_alex_atelo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_alex_atelo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/alex-atelo/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en.md new file mode 100644 index 00000000000000..0bab6f28c40072 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline pipeline DistilBertEmbeddings from alex-atelo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline` is a English model originally trained by alex-atelo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en_5.5.0_3.0_1725524698396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline_en_5.5.0_3.0_1725524698396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_alex_atelo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/alex-atelo/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_en.md new file mode 100644 index 00000000000000..d2d0108ae8398e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_chrisantha DistilBertEmbeddings from Chrisantha +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_chrisantha +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_chrisantha` is a English model originally trained by Chrisantha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chrisantha_en_5.5.0_3.0_1725524528626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chrisantha_en_5.5.0_3.0_1725524528626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_chrisantha","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_chrisantha","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_chrisantha| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chrisantha/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en.md new file mode 100644 index 00000000000000..25f41f8c2b87c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline pipeline DistilBertEmbeddings from Chrisantha +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline` is a English model originally trained by Chrisantha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en_5.5.0_3.0_1725524540791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline_en_5.5.0_3.0_1725524540791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_chrisantha_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chrisantha/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_jfcruz13_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_jfcruz13_en.md new file mode 100644 index 00000000000000..d20fc9dc10009a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_jfcruz13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jfcruz13 DistilBertEmbeddings from jfcruz13 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jfcruz13 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jfcruz13` is a English model originally trained by jfcruz13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jfcruz13_en_5.5.0_3.0_1725524317749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jfcruz13_en_5.5.0_3.0_1725524317749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jfcruz13","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jfcruz13","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jfcruz13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jfcruz13/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_en.md new file mode 100644 index 00000000000000..c1fd207bf5fc91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_kennytheo DistilBertEmbeddings from kennyTheo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_kennytheo +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_kennytheo` is a English model originally trained by kennyTheo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kennytheo_en_5.5.0_3.0_1725524208123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kennytheo_en_5.5.0_3.0_1725524208123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_kennytheo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_kennytheo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_kennytheo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kennyTheo/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en.md new file mode 100644 index 00000000000000..939bc54be40fe9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline pipeline DistilBertEmbeddings from kennyTheo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline` is a English model originally trained by kennyTheo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en_5.5.0_3.0_1725524219963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline_en_5.5.0_3.0_1725524219963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_kennytheo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kennyTheo/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_en.md new file mode 100644 index 00000000000000..57621713f0793f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_miktf DistilBertEmbeddings from miktf +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_miktf +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_miktf` is a English model originally trained by miktf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_miktf_en_5.5.0_3.0_1725524429411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_miktf_en_5.5.0_3.0_1725524429411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_miktf","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_miktf","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_miktf| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/miktf/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en.md new file mode 100644 index 00000000000000..edb2343bb82fe7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_miktf_pipeline pipeline DistilBertEmbeddings from miktf +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_miktf_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_miktf_pipeline` is a English model originally trained by miktf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en_5.5.0_3.0_1725524441465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_miktf_pipeline_en_5.5.0_3.0_1725524441465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_miktf_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_miktf_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_miktf_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/miktf/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_en.md new file mode 100644 index 00000000000000..8e2d2fe613a409 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_muffato DistilBertEmbeddings from muffato +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_muffato +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_muffato` is a English model originally trained by muffato. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_muffato_en_5.5.0_3.0_1725524504254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_muffato_en_5.5.0_3.0_1725524504254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_muffato","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_muffato","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_muffato| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/muffato/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en.md new file mode 100644 index 00000000000000..f82871af13204b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_muffato_pipeline pipeline DistilBertEmbeddings from muffato +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_muffato_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_muffato_pipeline` is a English model originally trained by muffato. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en_5.5.0_3.0_1725524517455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_muffato_pipeline_en_5.5.0_3.0_1725524517455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_muffato_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_muffato_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_muffato_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/muffato/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en.md new file mode 100644 index 00000000000000..889164075dfe3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2 DistilBertEmbeddings from BanUrsus +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en_5.5.0_3.0_1725524409937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_en_5.5.0_3.0_1725524409937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BanUrsus/distilbert-base-uncased-finetuned-imdb_nlp-course-chapter7-section2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en.md new file mode 100644 index 00000000000000..441a68c15a9d34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline pipeline DistilBertEmbeddings from BanUrsus +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en_5.5.0_3.0_1725524423010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline_en_5.5.0_3.0_1725524423010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_nlp_course_chapter7_section2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BanUrsus/distilbert-base-uncased-finetuned-imdb_nlp-course-chapter7-section2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en.md new file mode 100644 index 00000000000000..57973f23b245b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline pipeline DistilBertEmbeddings from zhenchuan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline` is a English model originally trained by zhenchuan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en_5.5.0_3.0_1725524109941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline_en_5.5.0_3.0_1725524109941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_zhenchuan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/zhenchuan/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_en.md new file mode 100644 index 00000000000000..759daa9c55ad02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_lm_attck DistilBertEmbeddings from fbi0826 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_lm_attck +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_lm_attck` is a English model originally trained by fbi0826. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lm_attck_en_5.5.0_3.0_1725524308095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lm_attck_en_5.5.0_3.0_1725524308095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_lm_attck","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_lm_attck","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_lm_attck| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fbi0826/distilbert-base-uncased-finetuned-LM-ATTCK \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_pipeline_en.md new file mode 100644 index 00000000000000..3901cb3056c07f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_lm_attck_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_lm_attck_pipeline pipeline DistilBertEmbeddings from fbi0826 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_lm_attck_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_lm_attck_pipeline` is a English model originally trained by fbi0826. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lm_attck_pipeline_en_5.5.0_3.0_1725524320795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lm_attck_pipeline_en_5.5.0_3.0_1725524320795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_lm_attck_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_lm_attck_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_lm_attck_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fbi0826/distilbert-base-uncased-finetuned-LM-ATTCK + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_neg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_neg_pipeline_en.md new file mode 100644 index 00000000000000..98a347e1d4e122 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_neg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_neg_pipeline pipeline DistilBertForTokenClassification from tqoyiwcvwkephzdgsp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_neg_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_neg_pipeline` is a English model originally trained by tqoyiwcvwkephzdgsp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_neg_pipeline_en_5.5.0_3.0_1725518102267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_neg_pipeline_en_5.5.0_3.0_1725518102267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_neg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_neg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_neg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/tqoyiwcvwkephzdgsp/distilbert-base-uncased-finetuned-neg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_en.md new file mode 100644 index 00000000000000..a26d8042fd61ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_negation_scope_classification DistilBertForTokenClassification from liatoutou +author: John Snow Labs +name: distilbert_base_uncased_finetuned_negation_scope_classification +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_negation_scope_classification` is a English model originally trained by liatoutou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_negation_scope_classification_en_5.5.0_3.0_1725500562697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_negation_scope_classification_en_5.5.0_3.0_1725500562697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_negation_scope_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_negation_scope_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_negation_scope_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/liatoutou/distilbert-base-uncased-finetuned-negation-scope-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en.md new file mode 100644 index 00000000000000..653d1d3090e3ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_negation_scope_classification_pipeline pipeline DistilBertForTokenClassification from liatoutou +author: John Snow Labs +name: distilbert_base_uncased_finetuned_negation_scope_classification_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_negation_scope_classification_pipeline` is a English model originally trained by liatoutou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en_5.5.0_3.0_1725500574528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_negation_scope_classification_pipeline_en_5.5.0_3.0_1725500574528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_negation_scope_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_negation_scope_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_negation_scope_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/liatoutou/distilbert-base-uncased-finetuned-negation-scope-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en.md new file mode 100644 index 00000000000000..531096a4425003 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline pipeline DistilBertForTokenClassification from CeciliaFu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline` is a English model originally trained by CeciliaFu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en_5.5.0_3.0_1725500537445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline_en_5.5.0_3.0_1725500537445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_ceciliafu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/CeciliaFu/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en.md new file mode 100644 index 00000000000000..021b0a0c814af3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_digidix28_pipeline pipeline DistilBertForTokenClassification from Digidix28 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_digidix28_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_digidix28_pipeline` is a English model originally trained by Digidix28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en_5.5.0_3.0_1725500695254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_digidix28_pipeline_en_5.5.0_3.0_1725500695254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_digidix28_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_digidix28_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_digidix28_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Digidix28/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_douglasadams11_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_douglasadams11_en.md new file mode 100644 index 00000000000000..7016996bed7126 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_douglasadams11_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_douglasadams11 DistilBertForTokenClassification from douglasadams11 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_douglasadams11 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_douglasadams11` is a English model originally trained by douglasadams11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_douglasadams11_en_5.5.0_3.0_1725500401175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_douglasadams11_en_5.5.0_3.0_1725500401175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_douglasadams11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_douglasadams11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_douglasadams11| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/douglasadams11/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_en.md new file mode 100644 index 00000000000000..c228e405872e88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_fatimetou DistilBertForTokenClassification from Fatimetou +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_fatimetou +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_fatimetou` is a English model originally trained by Fatimetou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_fatimetou_en_5.5.0_3.0_1725518867084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_fatimetou_en_5.5.0_3.0_1725518867084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_fatimetou","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_fatimetou", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_fatimetou| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Fatimetou/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en.md new file mode 100644 index 00000000000000..cbcf4d0aa10a46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_fatimetou_pipeline pipeline DistilBertForTokenClassification from Fatimetou +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_fatimetou_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_fatimetou_pipeline` is a English model originally trained by Fatimetou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en_5.5.0_3.0_1725518879603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_fatimetou_pipeline_en_5.5.0_3.0_1725518879603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_fatimetou_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_fatimetou_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_fatimetou_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Fatimetou/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en.md new file mode 100644 index 00000000000000..4b3f6f2320c9fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_furongzou_pipeline pipeline DistilBertForTokenClassification from FurongZou +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_furongzou_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_furongzou_pipeline` is a English model originally trained by FurongZou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en_5.5.0_3.0_1725518488746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_furongzou_pipeline_en_5.5.0_3.0_1725518488746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_furongzou_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_furongzou_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_furongzou_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/FurongZou/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ggital_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ggital_pipeline_en.md new file mode 100644 index 00000000000000..faf1b4ec3234dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ggital_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_ggital_pipeline pipeline DistilBertForTokenClassification from GGital +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_ggital_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_ggital_pipeline` is a English model originally trained by GGital. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ggital_pipeline_en_5.5.0_3.0_1725495920416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ggital_pipeline_en_5.5.0_3.0_1725495920416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ggital_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ggital_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_ggital_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/GGital/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_en.md new file mode 100644 index 00000000000000..de62917d9b9872 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_hcy5561 DistilBertForTokenClassification from hcy5561 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_hcy5561 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_hcy5561` is a English model originally trained by hcy5561. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hcy5561_en_5.5.0_3.0_1725518377772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hcy5561_en_5.5.0_3.0_1725518377772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_hcy5561","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_hcy5561", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_hcy5561| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hcy5561/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en.md new file mode 100644 index 00000000000000..d48686670aa5e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_hcy5561_pipeline pipeline DistilBertForTokenClassification from hcy5561 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_hcy5561_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_hcy5561_pipeline` is a English model originally trained by hcy5561. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en_5.5.0_3.0_1725518389816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hcy5561_pipeline_en_5.5.0_3.0_1725518389816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_hcy5561_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_hcy5561_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_hcy5561_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hcy5561/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_en.md new file mode 100644 index 00000000000000..61c7b9a96cfd1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_karunac DistilBertForTokenClassification from karunac +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_karunac +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_karunac` is a English model originally trained by karunac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_karunac_en_5.5.0_3.0_1725506127237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_karunac_en_5.5.0_3.0_1725506127237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_karunac","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_karunac", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_karunac| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/karunac/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_pipeline_en.md new file mode 100644 index 00000000000000..2326d8d79b324c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_karunac_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_karunac_pipeline pipeline DistilBertForTokenClassification from karunac +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_karunac_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_karunac_pipeline` is a English model originally trained by karunac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_karunac_pipeline_en_5.5.0_3.0_1725506140393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_karunac_pipeline_en_5.5.0_3.0_1725506140393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_karunac_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_karunac_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_karunac_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/karunac/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_lum4yx_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_lum4yx_en.md new file mode 100644 index 00000000000000..855ee883d3c8a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_lum4yx_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_lum4yx DistilBertForTokenClassification from Lum4yx +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_lum4yx +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_lum4yx` is a English model originally trained by Lum4yx. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_lum4yx_en_5.5.0_3.0_1725500996782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_lum4yx_en_5.5.0_3.0_1725500996782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_lum4yx","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_lum4yx", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_lum4yx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Lum4yx/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_en.md new file mode 100644 index 00000000000000..a089e410c5e54e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_mldscz DistilBertForTokenClassification from mldscz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_mldscz +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_mldscz` is a English model originally trained by mldscz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mldscz_en_5.5.0_3.0_1725518663102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mldscz_en_5.5.0_3.0_1725518663102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_mldscz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_mldscz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_mldscz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mldscz/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en.md new file mode 100644 index 00000000000000..389bf61d709600 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_mldscz_pipeline pipeline DistilBertForTokenClassification from mldscz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_mldscz_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_mldscz_pipeline` is a English model originally trained by mldscz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en_5.5.0_3.0_1725518675237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_mldscz_pipeline_en_5.5.0_3.0_1725518675237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_mldscz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_mldscz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_mldscz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mldscz/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_nsboan_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_nsboan_en.md new file mode 100644 index 00000000000000..39b4d56baa80ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_nsboan_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_nsboan DistilBertForTokenClassification from nsboan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_nsboan +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_nsboan` is a English model originally trained by nsboan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_nsboan_en_5.5.0_3.0_1725518359129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_nsboan_en_5.5.0_3.0_1725518359129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_nsboan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_nsboan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_nsboan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/nsboan/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en.md new file mode 100644 index 00000000000000..035eb66655a405 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline pipeline DistilBertForTokenClassification from shuvayanti +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline` is a English model originally trained by shuvayanti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en_5.5.0_3.0_1725506273699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline_en_5.5.0_3.0_1725506273699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_shuvayanti_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/shuvayanti/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en.md new file mode 100644 index 00000000000000..403b6b5483079b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline pipeline DistilBertForTokenClassification from ugrozkr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline` is a English model originally trained by ugrozkr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en_5.5.0_3.0_1725495661847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline_en_5.5.0_3.0_1725495661847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_ugrozkr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugrozkr/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_en.md new file mode 100644 index 00000000000000..83569b506bfa0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_reactjs DistilBertEmbeddings from mjalg +author: John Snow Labs +name: distilbert_base_uncased_finetuned_reactjs +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_reactjs` is a English model originally trained by mjalg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_reactjs_en_5.5.0_3.0_1725524394782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_reactjs_en_5.5.0_3.0_1725524394782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_reactjs","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_reactjs","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_reactjs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mjalg/distilbert-base-uncased-finetuned-reactjs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_pipeline_en.md new file mode 100644 index 00000000000000..ecef3641d688fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_reactjs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_reactjs_pipeline pipeline DistilBertEmbeddings from mjalg +author: John Snow Labs +name: distilbert_base_uncased_finetuned_reactjs_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_reactjs_pipeline` is a English model originally trained by mjalg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_reactjs_pipeline_en_5.5.0_3.0_1725524408085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_reactjs_pipeline_en_5.5.0_3.0_1725524408085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_reactjs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_reactjs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_reactjs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mjalg/distilbert-base-uncased-finetuned-reactjs + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_recipes_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_recipes_en.md new file mode 100644 index 00000000000000..c51a09c6de8f35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_recipes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_recipes DistilBertEmbeddings from CharlyR +author: John Snow Labs +name: distilbert_base_uncased_finetuned_recipes +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_recipes` is a English model originally trained by CharlyR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipes_en_5.5.0_3.0_1725524639919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipes_en_5.5.0_3.0_1725524639919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipes","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipes","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_recipes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/CharlyR/distilbert-base-uncased-finetuned-recipes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en.md new file mode 100644 index 00000000000000..cc196ad11e5aa5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080 DistilBertForTokenClassification from Justice0893 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080` is a English model originally trained by Justice0893. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en_5.5.0_3.0_1725518168565.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080_en_5.5.0_3.0_1725518168565.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sayula_popoluca_kazakh_3080| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|181.4 MB| + +## References + +https://huggingface.co/Justice0893/distilbert-base-uncased-finetuned-pos-kk-3080 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sentiment_luluw_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sentiment_luluw_en.md new file mode 100644 index 00000000000000..88e796ef3889e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sentiment_luluw_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sentiment_luluw DistilBertForSequenceClassification from luluw +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sentiment_luluw +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sentiment_luluw` is a English model originally trained by luluw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sentiment_luluw_en_5.5.0_3.0_1725580507231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sentiment_luluw_en_5.5.0_3.0_1725580507231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sentiment_luluw","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sentiment_luluw", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sentiment_luluw| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/luluw/distilbert-base-uncased-finetuned-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en.md new file mode 100644 index 00000000000000..03be35015f5399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_srl_jing1113_pipeline pipeline DistilBertForTokenClassification from Jing1113 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_srl_jing1113_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_srl_jing1113_pipeline` is a English model originally trained by Jing1113. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en_5.5.0_3.0_1725506042452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_srl_jing1113_pipeline_en_5.5.0_3.0_1725506042452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_srl_jing1113_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_srl_jing1113_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_srl_jing1113_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Jing1113/distilbert-base-uncased-finetuned-srl + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_en.md new file mode 100644 index 00000000000000..0b690d084a45bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sst_2_english_distilbert DistilBertForSequenceClassification from distilbert +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sst_2_english_distilbert +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sst_2_english_distilbert` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sst_2_english_distilbert_en_5.5.0_3.0_1725580099889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sst_2_english_distilbert_en_5.5.0_3.0_1725580099889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sst_2_english_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sst_2_english_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sst_2_english_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..4c4e0632ff39e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline pipeline DistilBertForSequenceClassification from distilbert +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en_5.5.0_3.0_1725580112785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline_en_5.5.0_3.0_1725580112785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sst_2_english_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_yelp_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_yelp_en.md new file mode 100644 index 00000000000000..ea0f7dfc4a9613 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_finetuned_yelp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_yelp DistilBertForSequenceClassification from vinhanguyen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_yelp +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_yelp` is a English model originally trained by vinhanguyen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_yelp_en_5.5.0_3.0_1725579959577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_yelp_en_5.5.0_3.0_1725579959577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_yelp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_yelp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_yelp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.4 MB| + +## References + +https://huggingface.co/vinhanguyen/distilbert-base-uncased-finetuned-yelp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_de.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_de.md new file mode 100644 index 00000000000000..ee0ffd2b7d650c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German distilbert_base_uncased_german_chamorro_cree_entry_classification DistilBertForSequenceClassification from reflex-project +author: John Snow Labs +name: distilbert_base_uncased_german_chamorro_cree_entry_classification +date: 2024-09-05 +tags: [de, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_german_chamorro_cree_entry_classification` is a German model originally trained by reflex-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_german_chamorro_cree_entry_classification_de_5.5.0_3.0_1725580125963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_german_chamorro_cree_entry_classification_de_5.5.0_3.0_1725580125963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_german_chamorro_cree_entry_classification","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_german_chamorro_cree_entry_classification", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_german_chamorro_cree_entry_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|de| +|Size:|249.5 MB| + +## References + +https://huggingface.co/reflex-project/distilbert-base-uncased-german-ch-cr-entry-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de.md new file mode 100644 index 00000000000000..0a2a5d5bc6c9ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline pipeline DistilBertForSequenceClassification from reflex-project +author: John Snow Labs +name: distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline +date: 2024-09-05 +tags: [de, open_source, pipeline, onnx] +task: Text Classification +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline` is a German model originally trained by reflex-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de_5.5.0_3.0_1725580141724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline_de_5.5.0_3.0_1725580141724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_german_chamorro_cree_entry_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|249.5 MB| + +## References + +https://huggingface.co/reflex-project/distilbert-base-uncased-german-ch-cr-entry-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_en.md new file mode 100644 index 00000000000000..1e12bd781f7b45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_odm_zphr_0st17sd DistilBertForSequenceClassification from tom192180 +author: John Snow Labs +name: distilbert_base_uncased_odm_zphr_0st17sd +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_odm_zphr_0st17sd` is a English model originally trained by tom192180. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st17sd_en_5.5.0_3.0_1725507531519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st17sd_en_5.5.0_3.0_1725507531519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_odm_zphr_0st17sd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_odm_zphr_0st17sd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_odm_zphr_0st17sd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/tom192180/distilbert-base-uncased_odm_zphr_0st17sd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en.md new file mode 100644 index 00000000000000..cf8de7e3d10eb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_odm_zphr_0st17sd_pipeline pipeline DistilBertForSequenceClassification from tom192180 +author: John Snow Labs +name: distilbert_base_uncased_odm_zphr_0st17sd_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_odm_zphr_0st17sd_pipeline` is a English model originally trained by tom192180. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en_5.5.0_3.0_1725507543989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st17sd_pipeline_en_5.5.0_3.0_1725507543989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_odm_zphr_0st17sd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_odm_zphr_0st17sd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_odm_zphr_0st17sd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.7 MB| + +## References + +https://huggingface.co/tom192180/distilbert-base-uncased_odm_zphr_0st17sd + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_pii_finance_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_pii_finance_pipeline_en.md new file mode 100644 index 00000000000000..dab70b778105cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_pii_finance_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_pii_finance_pipeline pipeline DistilBertForTokenClassification from devtibo +author: John Snow Labs +name: distilbert_base_uncased_pii_finance_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_pii_finance_pipeline` is a English model originally trained by devtibo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_pii_finance_pipeline_en_5.5.0_3.0_1725518813986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_pii_finance_pipeline_en_5.5.0_3.0_1725518813986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_pii_finance_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_pii_finance_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_pii_finance_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/devtibo/distilbert-base-uncased-pii-finance + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_qqp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_qqp_pipeline_en.md new file mode 100644 index 00000000000000..dfc533f375be9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_qqp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_qqp_pipeline pipeline DistilBertForSequenceClassification from textattack +author: John Snow Labs +name: distilbert_base_uncased_qqp_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_qqp_pipeline` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_qqp_pipeline_en_5.5.0_3.0_1725507436515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_qqp_pipeline_en_5.5.0_3.0_1725507436515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_qqp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_qqp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_qqp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/textattack/distilbert-base-uncased-QQP + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_tokenclassification_yeji_seong_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_tokenclassification_yeji_seong_en.md new file mode 100644 index 00000000000000..68ee18dcff1e7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_base_uncased_tokenclassification_yeji_seong_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_tokenclassification_yeji_seong DistilBertForTokenClassification from Yeji-Seong +author: John Snow Labs +name: distilbert_base_uncased_tokenclassification_yeji_seong +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_tokenclassification_yeji_seong` is a English model originally trained by Yeji-Seong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_tokenclassification_yeji_seong_en_5.5.0_3.0_1725500274540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_tokenclassification_yeji_seong_en_5.5.0_3.0_1725500274540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_tokenclassification_yeji_seong","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_tokenclassification_yeji_seong", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_tokenclassification_yeji_seong| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Yeji-Seong/distilbert-base-uncased-tokenclassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_drugscom_depression_reviews_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_drugscom_depression_reviews_pipeline_en.md new file mode 100644 index 00000000000000..0cd62d49182d01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_drugscom_depression_reviews_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_drugscom_depression_reviews_pipeline pipeline DistilBertForSequenceClassification from Zakia +author: John Snow Labs +name: distilbert_drugscom_depression_reviews_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_drugscom_depression_reviews_pipeline` is a English model originally trained by Zakia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_drugscom_depression_reviews_pipeline_en_5.5.0_3.0_1725580366519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_drugscom_depression_reviews_pipeline_en_5.5.0_3.0_1725580366519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_drugscom_depression_reviews_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_drugscom_depression_reviews_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_drugscom_depression_reviews_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Zakia/distilbert-drugscom_depression_reviews + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_en.md new file mode 100644 index 00000000000000..6e47f8ff9b46b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_enron_hf_format_ft_v2 DistilBertForTokenClassification from DymiumSpencer +author: John Snow Labs +name: distilbert_enron_hf_format_ft_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_enron_hf_format_ft_v2` is a English model originally trained by DymiumSpencer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_enron_hf_format_ft_v2_en_5.5.0_3.0_1725496122842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_enron_hf_format_ft_v2_en_5.5.0_3.0_1725496122842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_enron_hf_format_ft_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_enron_hf_format_ft_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_enron_hf_format_ft_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DymiumSpencer/distilbert_enron_hf_format_ft_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_pipeline_en.md new file mode 100644 index 00000000000000..f1e8e1c8f0572c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_enron_hf_format_ft_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_enron_hf_format_ft_v2_pipeline pipeline DistilBertForTokenClassification from DymiumSpencer +author: John Snow Labs +name: distilbert_enron_hf_format_ft_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_enron_hf_format_ft_v2_pipeline` is a English model originally trained by DymiumSpencer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_enron_hf_format_ft_v2_pipeline_en_5.5.0_3.0_1725496135037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_enron_hf_format_ft_v2_pipeline_en_5.5.0_3.0_1725496135037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_enron_hf_format_ft_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_enron_hf_format_ft_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_enron_hf_format_ft_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DymiumSpencer/distilbert_enron_hf_format_ft_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_exp_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_exp_en.md new file mode 100644 index 00000000000000..41069436f4f2ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_exp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_exp DistilBertForSequenceClassification from BruceT02 +author: John Snow Labs +name: distilbert_exp +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_exp` is a English model originally trained by BruceT02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_exp_en_5.5.0_3.0_1725580620145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_exp_en_5.5.0_3.0_1725580620145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_exp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_exp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_exp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BruceT02/DistilBert_Exp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_en.md new file mode 100644 index 00000000000000..58e438e0882486 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_ner_rasyosef DistilBertForTokenClassification from rasyosef +author: John Snow Labs +name: distilbert_finetuned_ner_rasyosef +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_rasyosef` is a English model originally trained by rasyosef. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_rasyosef_en_5.5.0_3.0_1725500274819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_rasyosef_en_5.5.0_3.0_1725500274819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_ner_rasyosef","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_ner_rasyosef", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_rasyosef| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/rasyosef/distilbert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_pipeline_en.md new file mode 100644 index 00000000000000..01912eecf18845 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_ner_rasyosef_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_ner_rasyosef_pipeline pipeline DistilBertForTokenClassification from rasyosef +author: John Snow Labs +name: distilbert_finetuned_ner_rasyosef_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_rasyosef_pipeline` is a English model originally trained by rasyosef. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_rasyosef_pipeline_en_5.5.0_3.0_1725500288607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_rasyosef_pipeline_en_5.5.0_3.0_1725500288607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_ner_rasyosef_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_ner_rasyosef_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_rasyosef_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/rasyosef/distilbert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_en.md new file mode 100644 index 00000000000000..909667651f8f2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_pii_mjalg DistilBertForTokenClassification from mjalg +author: John Snow Labs +name: distilbert_finetuned_pii_mjalg +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_pii_mjalg` is a English model originally trained by mjalg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_pii_mjalg_en_5.5.0_3.0_1725496015405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_pii_mjalg_en_5.5.0_3.0_1725496015405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_pii_mjalg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_pii_mjalg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_pii_mjalg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/mjalg/distilbert_finetuned_pii_mjalg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_pipeline_en.md new file mode 100644 index 00000000000000..8572fcfa9431a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_pii_mjalg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_pii_mjalg_pipeline pipeline DistilBertForTokenClassification from mjalg +author: John Snow Labs +name: distilbert_finetuned_pii_mjalg_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_pii_mjalg_pipeline` is a English model originally trained by mjalg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_pii_mjalg_pipeline_en_5.5.0_3.0_1725496028991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_pii_mjalg_pipeline_en_5.5.0_3.0_1725496028991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_pii_mjalg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_pii_mjalg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_pii_mjalg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/mjalg/distilbert_finetuned_pii_mjalg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_en.md new file mode 100644 index 00000000000000..55b20c7de5cd0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_token_classification_ner_trip DistilBertForTokenClassification from EliottClavier +author: John Snow Labs +name: distilbert_finetuned_token_classification_ner_trip +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_token_classification_ner_trip` is a English model originally trained by EliottClavier. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_token_classification_ner_trip_en_5.5.0_3.0_1725506048258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_token_classification_ner_trip_en_5.5.0_3.0_1725506048258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_token_classification_ner_trip","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_token_classification_ner_trip", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_token_classification_ner_trip| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/EliottClavier/distilbert-finetuned-token-classification-ner-trip \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_pipeline_en.md new file mode 100644 index 00000000000000..59afb525ed7545 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_token_classification_ner_trip_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_token_classification_ner_trip_pipeline pipeline DistilBertForTokenClassification from EliottClavier +author: John Snow Labs +name: distilbert_finetuned_token_classification_ner_trip_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_token_classification_ner_trip_pipeline` is a English model originally trained by EliottClavier. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_token_classification_ner_trip_pipeline_en_5.5.0_3.0_1725506060116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_token_classification_ner_trip_pipeline_en_5.5.0_3.0_1725506060116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_token_classification_ner_trip_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_token_classification_ner_trip_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_token_classification_ner_trip_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/EliottClavier/distilbert-finetuned-token-classification-ner-trip + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_en.md new file mode 100644 index 00000000000000..e76390e340f3c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English distilbert_finetuned_vietnamese_question_type DistilBertForSequenceClassification from EddieChen372 +author: John Snow Labs +name: distilbert_finetuned_vietnamese_question_type +date: 2024-09-05 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_vietnamese_question_type` is a English model originally trained by EddieChen372. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_vietnamese_question_type_en_5.5.0_3.0_1725580677857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_vietnamese_question_type_en_5.5.0_3.0_1725580677857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_finetuned_vietnamese_question_type","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_finetuned_vietnamese_question_type","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_vietnamese_question_type| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.6 MB| + +## References + +References + +https://huggingface.co/EddieChen372/distilbert-finetuned-vi-question_type \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_pipeline_en.md new file mode 100644 index 00000000000000..9ff147c9fb8679 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_finetuned_vietnamese_question_type_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_vietnamese_question_type_pipeline pipeline DistilBertForSequenceClassification from hchautran +author: John Snow Labs +name: distilbert_finetuned_vietnamese_question_type_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_vietnamese_question_type_pipeline` is a English model originally trained by hchautran. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_vietnamese_question_type_pipeline_en_5.5.0_3.0_1725580702790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_vietnamese_question_type_pipeline_en_5.5.0_3.0_1725580702790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_vietnamese_question_type_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_vietnamese_question_type_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_vietnamese_question_type_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.6 MB| + +## References + +https://huggingface.co/hchautran/distilbert-finetuned-vi-question_type + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_git_commits_bugfix_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_git_commits_bugfix_classification_en.md new file mode 100644 index 00000000000000..78e2893aa8376e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_git_commits_bugfix_classification_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English distilbert_git_commits_bugfix_classification DistilBertForSequenceClassification from nos1de +author: John Snow Labs +name: distilbert_git_commits_bugfix_classification +date: 2024-09-05 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_git_commits_bugfix_classification` is a English model originally trained by nos1de. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_git_commits_bugfix_classification_en_5.5.0_3.0_1725580521004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_git_commits_bugfix_classification_en_5.5.0_3.0_1725580521004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_git_commits_bugfix_classification","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_git_commits_bugfix_classification","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_git_commits_bugfix_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +References + +https://huggingface.co/nos1de/distilbert-git-commits-bugfix-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_en.md new file mode 100644 index 00000000000000..395b7d461ef4be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_imdb_padding20model DistilBertForSequenceClassification from Realgon +author: John Snow Labs +name: distilbert_imdb_padding20model +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_padding20model` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding20model_en_5.5.0_3.0_1725580258332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding20model_en_5.5.0_3.0_1725580258332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_imdb_padding20model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_imdb_padding20model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_padding20model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Realgon/distilbert_imdb_padding20model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_pipeline_en.md new file mode 100644 index 00000000000000..8305ed3ab49d82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding20model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_imdb_padding20model_pipeline pipeline DistilBertForSequenceClassification from Realgon +author: John Snow Labs +name: distilbert_imdb_padding20model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_padding20model_pipeline` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding20model_pipeline_en_5.5.0_3.0_1725580271122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding20model_pipeline_en_5.5.0_3.0_1725580271122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_imdb_padding20model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_imdb_padding20model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_padding20model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Realgon/distilbert_imdb_padding20model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding40model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding40model_pipeline_en.md new file mode 100644 index 00000000000000..2e6e0f1c7a71d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_imdb_padding40model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_imdb_padding40model_pipeline pipeline DistilBertForSequenceClassification from Realgon +author: John Snow Labs +name: distilbert_imdb_padding40model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_padding40model_pipeline` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding40model_pipeline_en_5.5.0_3.0_1725507783107.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_padding40model_pipeline_en_5.5.0_3.0_1725507783107.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_imdb_padding40model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_imdb_padding40model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_padding40model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Realgon/distilbert_imdb_padding40model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_lolchamps_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_lolchamps_pipeline_en.md new file mode 100644 index 00000000000000..81a353d2f2cf7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_lolchamps_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_lolchamps_pipeline pipeline DistilBertEmbeddings from avinot +author: John Snow Labs +name: distilbert_lolchamps_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_lolchamps_pipeline` is a English model originally trained by avinot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_lolchamps_pipeline_en_5.5.0_3.0_1725524055232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_lolchamps_pipeline_en_5.5.0_3.0_1725524055232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_lolchamps_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_lolchamps_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_lolchamps_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/avinot/distilbert-lolchamps + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_augmented_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_augmented_pipeline_en.md new file mode 100644 index 00000000000000..5e0625ecea3aee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_augmented_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_ner_augmented_pipeline pipeline DistilBertForTokenClassification from Azure-Heights +author: John Snow Labs +name: distilbert_ner_augmented_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ner_augmented_pipeline` is a English model originally trained by Azure-Heights. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ner_augmented_pipeline_en_5.5.0_3.0_1725518453682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ner_augmented_pipeline_en_5.5.0_3.0_1725518453682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_ner_augmented_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_ner_augmented_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ner_augmented_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Azure-Heights/distilbert-ner-augmented + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_japanese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_japanese_pipeline_en.md new file mode 100644 index 00000000000000..74b5ce25b182d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_ner_japanese_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_ner_japanese_pipeline pipeline DistilBertForTokenClassification from rizkyfoxcale +author: John Snow Labs +name: distilbert_ner_japanese_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ner_japanese_pipeline` is a English model originally trained by rizkyfoxcale. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ner_japanese_pipeline_en_5.5.0_3.0_1725506504663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ner_japanese_pipeline_en_5.5.0_3.0_1725506504663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_ner_japanese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_ner_japanese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ner_japanese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/rizkyfoxcale/distilbert-ner-ja + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_en.md new file mode 100644 index 00000000000000..da401e704ba160 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_sentiment_analysis_multiclass_dataset DistilBertForSequenceClassification from xsending +author: John Snow Labs +name: distilbert_sentiment_analysis_multiclass_dataset +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sentiment_analysis_multiclass_dataset` is a English model originally trained by xsending. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_multiclass_dataset_en_5.5.0_3.0_1725507737532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_multiclass_dataset_en_5.5.0_3.0_1725507737532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_sentiment_analysis_multiclass_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_sentiment_analysis_multiclass_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sentiment_analysis_multiclass_dataset| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/xsending/distilBERT_sentiment_analysis_multiclass_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_pipeline_en.md new file mode 100644 index 00000000000000..1d72e24565304c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_analysis_multiclass_dataset_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_sentiment_analysis_multiclass_dataset_pipeline pipeline DistilBertForSequenceClassification from xsending +author: John Snow Labs +name: distilbert_sentiment_analysis_multiclass_dataset_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sentiment_analysis_multiclass_dataset_pipeline` is a English model originally trained by xsending. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_multiclass_dataset_pipeline_en_5.5.0_3.0_1725507749160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_multiclass_dataset_pipeline_en_5.5.0_3.0_1725507749160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_sentiment_analysis_multiclass_dataset_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_sentiment_analysis_multiclass_dataset_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sentiment_analysis_multiclass_dataset_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/xsending/distilBERT_sentiment_analysis_multiclass_dataset + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_classifier_kiel1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_classifier_kiel1_pipeline_en.md new file mode 100644 index 00000000000000..87caced9a8fc37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_sentiment_classifier_kiel1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_sentiment_classifier_kiel1_pipeline pipeline DistilBertForSequenceClassification from kieltraining +author: John Snow Labs +name: distilbert_sentiment_classifier_kiel1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sentiment_classifier_kiel1_pipeline` is a English model originally trained by kieltraining. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_classifier_kiel1_pipeline_en_5.5.0_3.0_1725507432710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_classifier_kiel1_pipeline_en_5.5.0_3.0_1725507432710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_sentiment_classifier_kiel1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_sentiment_classifier_kiel1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sentiment_classifier_kiel1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/kieltraining/distilbert-sentiment-classifier_kiel1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_en.md new file mode 100644 index 00000000000000..66789be5f52742 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_tokenizer_256k_mlm_500k DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_tokenizer_256k_mlm_500k +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tokenizer_256k_mlm_500k` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_500k_en_5.5.0_3.0_1725524112483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_500k_en_5.5.0_3.0_1725524112483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_tokenizer_256k_mlm_500k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_tokenizer_256k_mlm_500k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tokenizer_256k_mlm_500k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|899.6 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-tokenizer_256k-MLM_500k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_pipeline_en.md new file mode 100644 index 00000000000000..c41db597cb3178 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_tokenizer_256k_mlm_500k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_tokenizer_256k_mlm_500k_pipeline pipeline DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_tokenizer_256k_mlm_500k_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tokenizer_256k_mlm_500k_pipeline` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_500k_pipeline_en_5.5.0_3.0_1725524155337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_500k_pipeline_en_5.5.0_3.0_1725524155337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_tokenizer_256k_mlm_500k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_tokenizer_256k_mlm_500k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tokenizer_256k_mlm_500k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|899.6 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-tokenizer_256k-MLM_500k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_en.md new file mode 100644 index 00000000000000..7e56e77dbc8848 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_toxic_detector_multi_label DistilBertForSequenceClassification from borodache +author: John Snow Labs +name: distilbert_toxic_detector_multi_label +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_toxic_detector_multi_label` is a English model originally trained by borodache. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_toxic_detector_multi_label_en_5.5.0_3.0_1725507624337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_toxic_detector_multi_label_en_5.5.0_3.0_1725507624337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_toxic_detector_multi_label","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_toxic_detector_multi_label", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_toxic_detector_multi_label| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/borodache/distilBERT_toxic_detector_multi_label \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_pipeline_en.md new file mode 100644 index 00000000000000..ea27393804a633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_toxic_detector_multi_label_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_toxic_detector_multi_label_pipeline pipeline DistilBertForSequenceClassification from borodache +author: John Snow Labs +name: distilbert_toxic_detector_multi_label_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_toxic_detector_multi_label_pipeline` is a English model originally trained by borodache. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_toxic_detector_multi_label_pipeline_en_5.5.0_3.0_1725507637383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_toxic_detector_multi_label_pipeline_en_5.5.0_3.0_1725507637383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_toxic_detector_multi_label_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_toxic_detector_multi_label_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_toxic_detector_multi_label_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/borodache/distilBERT_toxic_detector_multi_label + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_turkish_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_turkish_ner_pipeline_en.md new file mode 100644 index 00000000000000..0eaec22d3ed149 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_turkish_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_turkish_ner_pipeline pipeline DistilBertForTokenClassification from pnr-svc +author: John Snow Labs +name: distilbert_turkish_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_turkish_ner_pipeline` is a English model originally trained by pnr-svc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_turkish_ner_pipeline_en_5.5.0_3.0_1725506014380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_turkish_ner_pipeline_en_5.5.0_3.0_1725506014380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_turkish_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_turkish_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_turkish_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|251.9 MB| + +## References + +https://huggingface.co/pnr-svc/distilbert-turkish-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilbert_v0_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilbert_v0_en.md new file mode 100644 index 00000000000000..9cc6f67107d229 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilbert_v0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_v0 DistilBertForTokenClassification from labicquette +author: John Snow Labs +name: distilbert_v0 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_v0` is a English model originally trained by labicquette. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_v0_en_5.5.0_3.0_1725518689222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_v0_en_5.5.0_3.0_1725518689222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_v0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_v0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_v0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/labicquette/distilbert-v0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_en.md new file mode 100644 index 00000000000000..3a6a36617ebc84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distill_sarcasm_english DistilBertForSequenceClassification from arthd24 +author: John Snow Labs +name: distill_sarcasm_english +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distill_sarcasm_english` is a English model originally trained by arthd24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distill_sarcasm_english_en_5.5.0_3.0_1725580229597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distill_sarcasm_english_en_5.5.0_3.0_1725580229597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distill_sarcasm_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distill_sarcasm_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distill_sarcasm_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/arthd24/distill_sarcasm_en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_pipeline_en.md new file mode 100644 index 00000000000000..522afe9c699e07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distill_sarcasm_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distill_sarcasm_english_pipeline pipeline DistilBertForSequenceClassification from arthd24 +author: John Snow Labs +name: distill_sarcasm_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distill_sarcasm_english_pipeline` is a English model originally trained by arthd24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distill_sarcasm_english_pipeline_en_5.5.0_3.0_1725580242230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distill_sarcasm_english_pipeline_en_5.5.0_3.0_1725580242230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distill_sarcasm_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distill_sarcasm_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distill_sarcasm_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/arthd24/distill_sarcasm_en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distillbert_fine_tune_ner_task_en.md b/docs/_posts/ahmedlone127/2024-09-05-distillbert_fine_tune_ner_task_en.md new file mode 100644 index 00000000000000..3cd44460612cf2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distillbert_fine_tune_ner_task_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_fine_tune_ner_task DistilBertForTokenClassification from mokarakaya +author: John Snow Labs +name: distillbert_fine_tune_ner_task +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_fine_tune_ner_task` is a English model originally trained by mokarakaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_fine_tune_ner_task_en_5.5.0_3.0_1725495730972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_fine_tune_ner_task_en_5.5.0_3.0_1725495730972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distillbert_fine_tune_ner_task","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distillbert_fine_tune_ner_task", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_fine_tune_ner_task| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mokarakaya/distillbert-fine-tune-ner-task \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distillbert_finetuned_ner_btc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distillbert_finetuned_ner_btc_pipeline_en.md new file mode 100644 index 00000000000000..4c6d0ed08dd166 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distillbert_finetuned_ner_btc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distillbert_finetuned_ner_btc_pipeline pipeline DistilBertForTokenClassification from farrukhrasool112 +author: John Snow Labs +name: distillbert_finetuned_ner_btc_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_finetuned_ner_btc_pipeline` is a English model originally trained by farrukhrasool112. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_ner_btc_pipeline_en_5.5.0_3.0_1725518267877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_finetuned_ner_btc_pipeline_en_5.5.0_3.0_1725518267877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distillbert_finetuned_ner_btc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distillbert_finetuned_ner_btc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_finetuned_ner_btc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/farrukhrasool112/distillbert-finetuned-ner-btc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_en.md new file mode 100644 index 00000000000000..bcdb04c2fe1b5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilled_roberta RoBertaEmbeddings from tanyildizderya +author: John Snow Labs +name: distilled_roberta +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilled_roberta` is a English model originally trained by tanyildizderya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilled_roberta_en_5.5.0_3.0_1725572820400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilled_roberta_en_5.5.0_3.0_1725572820400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilled_roberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilled_roberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilled_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|482.0 MB| + +## References + +https://huggingface.co/tanyildizderya/distilled-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_pipeline_en.md new file mode 100644 index 00000000000000..3f58ab78e99e5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilled_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilled_roberta_pipeline pipeline RoBertaEmbeddings from tanyildizderya +author: John Snow Labs +name: distilled_roberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilled_roberta_pipeline` is a English model originally trained by tanyildizderya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilled_roberta_pipeline_en_5.5.0_3.0_1725572962615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilled_roberta_pipeline_en_5.5.0_3.0_1725572962615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilled_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilled_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilled_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|482.0 MB| + +## References + +https://huggingface.co/tanyildizderya/distilled-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_finetuned_energy_tweets_fullsample_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_finetuned_energy_tweets_fullsample_en.md new file mode 100644 index 00000000000000..3922f15a9be90c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_finetuned_energy_tweets_fullsample_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_finetuned_energy_tweets_fullsample RoBertaEmbeddings from Saphbn +author: John Snow Labs +name: distilroberta_base_finetuned_energy_tweets_fullsample +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_finetuned_energy_tweets_fullsample` is a English model originally trained by Saphbn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_finetuned_energy_tweets_fullsample_en_5.5.0_3.0_1725577793593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_finetuned_energy_tweets_fullsample_en_5.5.0_3.0_1725577793593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_finetuned_energy_tweets_fullsample","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_finetuned_energy_tweets_fullsample","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_finetuned_energy_tweets_fullsample| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.4 MB| + +## References + +https://huggingface.co/Saphbn/distilroberta-base-finetuned-energy-tweets-fullsample \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_en.md new file mode 100644 index 00000000000000..8db621715839a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_ft_news RoBertaEmbeddings from jkruk +author: John Snow Labs +name: distilroberta_base_ft_news +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_ft_news` is a English model originally trained by jkruk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_news_en_5.5.0_3.0_1725572784431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_news_en_5.5.0_3.0_1725572784431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_news","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_news","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_ft_news| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/jkruk/distilroberta-base-ft-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_pipeline_en.md new file mode 100644 index 00000000000000..4e357c7c4f5481 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-distilroberta_base_ft_news_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_base_ft_news_pipeline pipeline RoBertaEmbeddings from jkruk +author: John Snow Labs +name: distilroberta_base_ft_news_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_ft_news_pipeline` is a English model originally trained by jkruk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_news_pipeline_en_5.5.0_3.0_1725572800433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_news_pipeline_en_5.5.0_3.0_1725572800433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_ft_news_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_ft_news_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_ft_news_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/jkruk/distilroberta-base-ft-news + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ditransformersert_base_uncased_tokenclassification_lora_en.md b/docs/_posts/ahmedlone127/2024-09-05-ditransformersert_base_uncased_tokenclassification_lora_en.md new file mode 100644 index 00000000000000..0a6d149fb9b44a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ditransformersert_base_uncased_tokenclassification_lora_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ditransformersert_base_uncased_tokenclassification_lora DistilBertForTokenClassification from urisoo +author: John Snow Labs +name: ditransformersert_base_uncased_tokenclassification_lora +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ditransformersert_base_uncased_tokenclassification_lora` is a English model originally trained by urisoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ditransformersert_base_uncased_tokenclassification_lora_en_5.5.0_3.0_1725506144971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ditransformersert_base_uncased_tokenclassification_lora_en_5.5.0_3.0_1725506144971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ditransformersert_base_uncased_tokenclassification_lora","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ditransformersert_base_uncased_tokenclassification_lora", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ditransformersert_base_uncased_tokenclassification_lora| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/urisoo/ditransformersert-base-uncased-tokenclassification_lora \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dock_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-dock_3_en.md new file mode 100644 index 00000000000000..8a65d8724c2a48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dock_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dock_3 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: dock_3 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dock_3` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dock_3_en_5.5.0_3.0_1725542266409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dock_3_en_5.5.0_3.0_1725542266409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("dock_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("dock_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dock_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Dock_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dock_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-dock_3_pipeline_en.md new file mode 100644 index 00000000000000..201dc9f3c095bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dock_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dock_3_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: dock_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dock_3_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dock_3_pipeline_en_5.5.0_3.0_1725542290229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dock_3_pipeline_en_5.5.0_3.0_1725542290229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dock_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dock_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dock_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Dock_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-drbert_casm2_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-drbert_casm2_pipeline_fr.md new file mode 100644 index 00000000000000..34ebe5c71c090f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-drbert_casm2_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French drbert_casm2_pipeline pipeline BertForTokenClassification from medkit +author: John Snow Labs +name: drbert_casm2_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drbert_casm2_pipeline` is a French model originally trained by medkit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drbert_casm2_pipeline_fr_5.5.0_3.0_1725563296489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drbert_casm2_pipeline_fr_5.5.0_3.0_1725563296489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("drbert_casm2_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("drbert_casm2_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drbert_casm2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|408.2 MB| + +## References + +https://huggingface.co/medkit/DrBERT-CASM2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_en.md new file mode 100644 index 00000000000000..61990184000f68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English dynamic_tinybert_finetuned_squad BertForQuestionAnswering from phdev +author: John Snow Labs +name: dynamic_tinybert_finetuned_squad +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dynamic_tinybert_finetuned_squad` is a English model originally trained by phdev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dynamic_tinybert_finetuned_squad_en_5.5.0_3.0_1725560263924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dynamic_tinybert_finetuned_squad_en_5.5.0_3.0_1725560263924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("dynamic_tinybert_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("dynamic_tinybert_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dynamic_tinybert_finetuned_squad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/phdev/dynamic_tinybert-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..259dc70aa8eb80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-dynamic_tinybert_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English dynamic_tinybert_finetuned_squad_pipeline pipeline BertForQuestionAnswering from phdev +author: John Snow Labs +name: dynamic_tinybert_finetuned_squad_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dynamic_tinybert_finetuned_squad_pipeline` is a English model originally trained by phdev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dynamic_tinybert_finetuned_squad_pipeline_en_5.5.0_3.0_1725560277695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dynamic_tinybert_finetuned_squad_pipeline_en_5.5.0_3.0_1725560277695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dynamic_tinybert_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dynamic_tinybert_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dynamic_tinybert_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/phdev/dynamic_tinybert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_en.md new file mode 100644 index 00000000000000..b28839d995f06d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English efficient_mlm_m0_10 RoBertaEmbeddings from rzhai +author: John Snow Labs +name: efficient_mlm_m0_10 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_10` is a English model originally trained by rzhai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_10_en_5.5.0_3.0_1725578767691.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_10_en_5.5.0_3.0_1725578767691.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_10","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_10","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|839.6 MB| + +## References + +https://huggingface.co/rzhai/efficient_mlm_m0.10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_pipeline_en.md new file mode 100644 index 00000000000000..5a75509156911e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_10_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English efficient_mlm_m0_10_pipeline pipeline RoBertaEmbeddings from rzhai +author: John Snow Labs +name: efficient_mlm_m0_10_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_10_pipeline` is a English model originally trained by rzhai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_10_pipeline_en_5.5.0_3.0_1725579000416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_10_pipeline_en_5.5.0_3.0_1725579000416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("efficient_mlm_m0_10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("efficient_mlm_m0_10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|839.6 MB| + +## References + +https://huggingface.co/rzhai/efficient_mlm_m0.10 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_en.md new file mode 100644 index 00000000000000..acebe975856b0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English efficient_mlm_m0_30 RoBertaEmbeddings from princeton-nlp +author: John Snow Labs +name: efficient_mlm_m0_30 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_30` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_30_en_5.5.0_3.0_1725577896446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_30_en_5.5.0_3.0_1725577896446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_30","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_30","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_30| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|843.8 MB| + +## References + +https://huggingface.co/princeton-nlp/efficient_mlm_m0.30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_pipeline_en.md new file mode 100644 index 00000000000000..22bdc9e668138a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_30_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English efficient_mlm_m0_30_pipeline pipeline RoBertaEmbeddings from princeton-nlp +author: John Snow Labs +name: efficient_mlm_m0_30_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_30_pipeline` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_30_pipeline_en_5.5.0_3.0_1725578138344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_30_pipeline_en_5.5.0_3.0_1725578138344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("efficient_mlm_m0_30_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("efficient_mlm_m0_30_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_30_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.8 MB| + +## References + +https://huggingface.co/princeton-nlp/efficient_mlm_m0.30 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_en.md new file mode 100644 index 00000000000000..9295b81642ddba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English efficient_mlm_m0_70 RoBertaEmbeddings from princeton-nlp +author: John Snow Labs +name: efficient_mlm_m0_70 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_70` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_70_en_5.5.0_3.0_1725572423797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_70_en_5.5.0_3.0_1725572423797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_70","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("efficient_mlm_m0_70","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_70| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|845.5 MB| + +## References + +https://huggingface.co/princeton-nlp/efficient_mlm_m0.70 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_pipeline_en.md new file mode 100644 index 00000000000000..e31ba2dc953188 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficient_mlm_m0_70_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English efficient_mlm_m0_70_pipeline pipeline RoBertaEmbeddings from princeton-nlp +author: John Snow Labs +name: efficient_mlm_m0_70_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_mlm_m0_70_pipeline` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_70_pipeline_en_5.5.0_3.0_1725572665816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_mlm_m0_70_pipeline_en_5.5.0_3.0_1725572665816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("efficient_mlm_m0_70_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("efficient_mlm_m0_70_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_mlm_m0_70_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|845.5 MB| + +## References + +https://huggingface.co/princeton-nlp/efficient_mlm_m0.70 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_en.md new file mode 100644 index 00000000000000..5a72a302ee8fd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English efficientnet_b0_urdu_ocr RoBertaEmbeddings from Hammad712 +author: John Snow Labs +name: efficientnet_b0_urdu_ocr +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficientnet_b0_urdu_ocr` is a English model originally trained by Hammad712. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficientnet_b0_urdu_ocr_en_5.5.0_3.0_1725577599611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficientnet_b0_urdu_ocr_en_5.5.0_3.0_1725577599611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("efficientnet_b0_urdu_ocr","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("efficientnet_b0_urdu_ocr","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficientnet_b0_urdu_ocr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|469.3 MB| + +## References + +https://huggingface.co/Hammad712/efficientnet-b0-urdu-ocr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_pipeline_en.md new file mode 100644 index 00000000000000..b662014bcf89ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-efficientnet_b0_urdu_ocr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English efficientnet_b0_urdu_ocr_pipeline pipeline RoBertaEmbeddings from Hammad712 +author: John Snow Labs +name: efficientnet_b0_urdu_ocr_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficientnet_b0_urdu_ocr_pipeline` is a English model originally trained by Hammad712. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficientnet_b0_urdu_ocr_pipeline_en_5.5.0_3.0_1725577627774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficientnet_b0_urdu_ocr_pipeline_en_5.5.0_3.0_1725577627774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("efficientnet_b0_urdu_ocr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("efficientnet_b0_urdu_ocr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficientnet_b0_urdu_ocr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|469.3 MB| + +## References + +https://huggingface.co/Hammad712/efficientnet-b0-urdu-ocr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_ar.md b/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_ar.md new file mode 100644 index 00000000000000..7e56d8a4325fab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_ar.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Arabic Electra Embeddings (from aubmindlab) +author: John Snow Labs +name: electra_embeddings_araelectra_base_generator +date: 2024-09-05 +tags: [ar, open_source, electra, embeddings, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Electra Embeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `araelectra-base-generator` is a Arabic model orginally trained by `aubmindlab`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_embeddings_araelectra_base_generator_ar_5.5.0_3.0_1725552740662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_embeddings_araelectra_base_generator_ar_5.5.0_3.0_1725552740662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("electra_embeddings_araelectra_base_generator","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["أنا أحب الشرارة NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("electra_embeddings_araelectra_base_generator","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("أنا أحب الشرارة NLP").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_embeddings_araelectra_base_generator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|ar| +|Size:|222.1 MB| + +## References + +References + +- https://huggingface.co/aubmindlab/araelectra-base-generator +- https://arxiv.org/pdf/1406.2661.pdf +- https://arxiv.org/abs/2012.15516 +- https://archive.org/details/arwiki-20190201 +- https://www.semanticscholar.org/paper/1.5-billion-words-Arabic-Corpus-El-Khair/f3eeef4afb81223df96575adadf808fe7fe440b4 +- https://www.aclweb.org/anthology/W19-4619 +- https://sites.aub.edu.lb/mindlab/ +- https://www.yakshof.com/#/ +- https://www.behance.net/rahalhabib +- https://www.linkedin.com/in/wissam-antoun-622142b4/ +- https://twitter.com/wissam_antoun +- https://github.com/WissamAntoun +- https://www.linkedin.com/in/fadybaly/ +- https://twitter.com/fadybaly +- https://github.com/fadybaly \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_pipeline_ar.md new file mode 100644 index 00000000000000..dd87ec36a4f9e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-electra_embeddings_araelectra_base_generator_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic electra_embeddings_araelectra_base_generator_pipeline pipeline BertEmbeddings from aubmindlab +author: John Snow Labs +name: electra_embeddings_araelectra_base_generator_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`electra_embeddings_araelectra_base_generator_pipeline` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_embeddings_araelectra_base_generator_pipeline_ar_5.5.0_3.0_1725552752203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_embeddings_araelectra_base_generator_pipeline_ar_5.5.0_3.0_1725552752203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("electra_embeddings_araelectra_base_generator_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("electra_embeddings_araelectra_base_generator_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_embeddings_araelectra_base_generator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|222.1 MB| + +## References + +https://huggingface.co/aubmindlab/araelectra-base-generator + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_en.md b/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_en.md new file mode 100644 index 00000000000000..cc6229489d18d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English ElectraForQuestionAnswering model (from mrm8488) Version-2 +author: John Snow Labs +name: electra_qa_base_finetuned_squadv2 +date: 2024-09-05 +tags: [en, open_source, electra, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `electra-base-finetuned-squadv2` is a English model originally trained by `mrm8488`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_qa_base_finetuned_squadv2_en_5.5.0_3.0_1725553930454.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_qa_base_finetuned_squadv2_en_5.5.0_3.0_1725553930454.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("electra_qa_base_finetuned_squadv2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = BertForQuestionAnswering.pretrained("electra_qa_base_finetuned_squadv2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.electra.base_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_qa_base_finetuned_squadv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|408.0 MB| + +## References + +References + +- https://huggingface.co/mrm8488/electra-base-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_pipeline_en.md new file mode 100644 index 00000000000000..0dbd44596ef399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-electra_qa_base_finetuned_squadv2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English electra_qa_base_finetuned_squadv2_pipeline pipeline BertForQuestionAnswering from mrm8488 +author: John Snow Labs +name: electra_qa_base_finetuned_squadv2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`electra_qa_base_finetuned_squadv2_pipeline` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_qa_base_finetuned_squadv2_pipeline_en_5.5.0_3.0_1725553951494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_qa_base_finetuned_squadv2_pipeline_en_5.5.0_3.0_1725553951494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("electra_qa_base_finetuned_squadv2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("electra_qa_base_finetuned_squadv2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_qa_base_finetuned_squadv2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/mrm8488/electra-base-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_en.md new file mode 100644 index 00000000000000..293da463802df0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English emoji_emoji_random3_seed0_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: emoji_emoji_random3_seed0_bernice +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emoji_emoji_random3_seed0_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emoji_emoji_random3_seed0_bernice_en_5.5.0_3.0_1725537650149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emoji_emoji_random3_seed0_bernice_en_5.5.0_3.0_1725537650149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("emoji_emoji_random3_seed0_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("emoji_emoji_random3_seed0_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emoji_emoji_random3_seed0_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|825.6 MB| + +## References + +https://huggingface.co/tweettemposhift/emoji-emoji_random3_seed0-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_pipeline_en.md new file mode 100644 index 00000000000000..dd3419dd9b62aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-emoji_emoji_random3_seed0_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English emoji_emoji_random3_seed0_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: emoji_emoji_random3_seed0_bernice_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emoji_emoji_random3_seed0_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emoji_emoji_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725537779816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emoji_emoji_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725537779816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("emoji_emoji_random3_seed0_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("emoji_emoji_random3_seed0_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emoji_emoji_random3_seed0_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|825.6 MB| + +## References + +https://huggingface.co/tweettemposhift/emoji-emoji_random3_seed0-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_en.md b/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_en.md new file mode 100644 index 00000000000000..964c2f458d9588 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_hebrew_modern_large MarianTransformer from orendar +author: John Snow Labs +name: english_hebrew_modern_large +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_hebrew_modern_large` is a English model originally trained by orendar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_large_en_5.5.0_3.0_1725544930824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_large_en_5.5.0_3.0_1725544930824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_hebrew_modern_large","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_hebrew_modern_large","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_hebrew_modern_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/orendar/en_he_large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_pipeline_en.md new file mode 100644 index 00000000000000..6b33dc4fba7c1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-english_hebrew_modern_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English english_hebrew_modern_large_pipeline pipeline MarianTransformer from orendar +author: John Snow Labs +name: english_hebrew_modern_large_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_hebrew_modern_large_pipeline` is a English model originally trained by orendar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_large_pipeline_en_5.5.0_3.0_1725544985187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_hebrew_modern_large_pipeline_en_5.5.0_3.0_1725544985187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("english_hebrew_modern_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("english_hebrew_modern_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_hebrew_modern_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/orendar/en_he_large + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_en.md b/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_en.md new file mode 100644 index 00000000000000..467d58fd525d10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_swahili_translation MarianTransformer from Bildad +author: John Snow Labs +name: english_swahili_translation +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_swahili_translation` is a English model originally trained by Bildad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_swahili_translation_en_5.5.0_3.0_1725544713904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_swahili_translation_en_5.5.0_3.0_1725544713904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_swahili_translation","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_swahili_translation","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_swahili_translation| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|506.4 MB| + +## References + +https://huggingface.co/Bildad/English-Swahili_Translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_pipeline_en.md new file mode 100644 index 00000000000000..9f8f7a9f616b92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-english_swahili_translation_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English english_swahili_translation_pipeline pipeline MarianTransformer from Bildad +author: John Snow Labs +name: english_swahili_translation_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_swahili_translation_pipeline` is a English model originally trained by Bildad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_swahili_translation_pipeline_en_5.5.0_3.0_1725544740330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_swahili_translation_pipeline_en_5.5.0_3.0_1725544740330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("english_swahili_translation_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("english_swahili_translation_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_swahili_translation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.0 MB| + +## References + +https://huggingface.co/Bildad/English-Swahili_Translation + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-english_tonga_tonga_islands_arabic_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-english_tonga_tonga_islands_arabic_v2_en.md new file mode 100644 index 00000000000000..f6d1fd03bd4cbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-english_tonga_tonga_islands_arabic_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_tonga_tonga_islands_arabic_v2 MarianTransformer from wingo-dz +author: John Snow Labs +name: english_tonga_tonga_islands_arabic_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_tonga_tonga_islands_arabic_v2` is a English model originally trained by wingo-dz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_arabic_v2_en_5.5.0_3.0_1725494666618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_tonga_tonga_islands_arabic_v2_en_5.5.0_3.0_1725494666618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_tonga_tonga_islands_arabic_v2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_tonga_tonga_islands_arabic_v2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_tonga_tonga_islands_arabic_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|528.3 MB| + +## References + +https://huggingface.co/wingo-dz/en-to-ar-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-enlm_roberta_130_imdb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-enlm_roberta_130_imdb_pipeline_en.md new file mode 100644 index 00000000000000..363211f86a5218 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-enlm_roberta_130_imdb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English enlm_roberta_130_imdb_pipeline pipeline XlmRoBertaForSequenceClassification from manirai91 +author: John Snow Labs +name: enlm_roberta_130_imdb_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`enlm_roberta_130_imdb_pipeline` is a English model originally trained by manirai91. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/enlm_roberta_130_imdb_pipeline_en_5.5.0_3.0_1725536910127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/enlm_roberta_130_imdb_pipeline_en_5.5.0_3.0_1725536910127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("enlm_roberta_130_imdb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("enlm_roberta_130_imdb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|enlm_roberta_130_imdb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/manirai91/enlm-roberta-130-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_en.md b/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_en.md new file mode 100644 index 00000000000000..b1e5098e90d87a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English esci_us_mpnet_crossencoder MPNetForSequenceClassification from spacemanidol +author: John Snow Labs +name: esci_us_mpnet_crossencoder +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esci_us_mpnet_crossencoder` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esci_us_mpnet_crossencoder_en_5.5.0_3.0_1725575503791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esci_us_mpnet_crossencoder_en_5.5.0_3.0_1725575503791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("esci_us_mpnet_crossencoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("esci_us_mpnet_crossencoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esci_us_mpnet_crossencoder| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/spacemanidol/esci-us-mpnet-crossencoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_pipeline_en.md new file mode 100644 index 00000000000000..b692c63ff0b0c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-esci_us_mpnet_crossencoder_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English esci_us_mpnet_crossencoder_pipeline pipeline MPNetForSequenceClassification from spacemanidol +author: John Snow Labs +name: esci_us_mpnet_crossencoder_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esci_us_mpnet_crossencoder_pipeline` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esci_us_mpnet_crossencoder_pipeline_en_5.5.0_3.0_1725575524487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esci_us_mpnet_crossencoder_pipeline_en_5.5.0_3.0_1725575524487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esci_us_mpnet_crossencoder_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esci_us_mpnet_crossencoder_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esci_us_mpnet_crossencoder_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/spacemanidol/esci-us-mpnet-crossencoder + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-esg_classification_french_english_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-esg_classification_french_english_pipeline_fr.md new file mode 100644 index 00000000000000..c265a1c30cc04f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-esg_classification_french_english_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French esg_classification_french_english_pipeline pipeline DistilBertForSequenceClassification from cea-list-lasti +author: John Snow Labs +name: esg_classification_french_english_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esg_classification_french_english_pipeline` is a French model originally trained by cea-list-lasti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esg_classification_french_english_pipeline_fr_5.5.0_3.0_1725506980005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esg_classification_french_english_pipeline_fr_5.5.0_3.0_1725506980005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esg_classification_french_english_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esg_classification_french_english_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esg_classification_french_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|508.0 MB| + +## References + +https://huggingface.co/cea-list-lasti/ESG-classification-fr-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_en.md b/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_en.md new file mode 100644 index 00000000000000..053acfd3321f11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English esmlmt59_2500 BertEmbeddings from hjkim811 +author: John Snow Labs +name: esmlmt59_2500 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esmlmt59_2500` is a English model originally trained by hjkim811. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esmlmt59_2500_en_5.5.0_3.0_1725533862229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esmlmt59_2500_en_5.5.0_3.0_1725533862229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("esmlmt59_2500","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("esmlmt59_2500","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esmlmt59_2500| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/hjkim811/esmlmt59-2500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_pipeline_en.md new file mode 100644 index 00000000000000..6e56a9c05781c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-esmlmt59_2500_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English esmlmt59_2500_pipeline pipeline BertEmbeddings from hjkim811 +author: John Snow Labs +name: esmlmt59_2500_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esmlmt59_2500_pipeline` is a English model originally trained by hjkim811. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esmlmt59_2500_pipeline_en_5.5.0_3.0_1725533881752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esmlmt59_2500_pipeline_en_5.5.0_3.0_1725533881752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esmlmt59_2500_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esmlmt59_2500_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esmlmt59_2500_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/hjkim811/esmlmt59-2500 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-experiment_foreign_language_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-experiment_foreign_language_pipeline_en.md new file mode 100644 index 00000000000000..a2a48e0d909533 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-experiment_foreign_language_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English experiment_foreign_language_pipeline pipeline DistilBertForTokenClassification from sophiestein +author: John Snow Labs +name: experiment_foreign_language_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`experiment_foreign_language_pipeline` is a English model originally trained by sophiestein. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/experiment_foreign_language_pipeline_en_5.5.0_3.0_1725495810203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/experiment_foreign_language_pipeline_en_5.5.0_3.0_1725495810203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("experiment_foreign_language_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("experiment_foreign_language_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|experiment_foreign_language_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/sophiestein/experiment_foreign_language + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fake_news_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-fake_news_classifier_en.md new file mode 100644 index 00000000000000..5f3fbd7a6dc297 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fake_news_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fake_news_classifier RoBertaForSequenceClassification from T0asty +author: John Snow Labs +name: fake_news_classifier +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fake_news_classifier` is a English model originally trained by T0asty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fake_news_classifier_en_5.5.0_3.0_1725542137728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fake_news_classifier_en_5.5.0_3.0_1725542137728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("fake_news_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("fake_news_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fake_news_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|461.4 MB| + +## References + +https://huggingface.co/T0asty/fake-news-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_en.md new file mode 100644 index 00000000000000..0e4295cd5cfa13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fin_roberta RoBertaForSequenceClassification from SUFEHeisenberg +author: John Snow Labs +name: fin_roberta +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fin_roberta` is a English model originally trained by SUFEHeisenberg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fin_roberta_en_5.5.0_3.0_1725542698287.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fin_roberta_en_5.5.0_3.0_1725542698287.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("fin_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("fin_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fin_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|474.0 MB| + +## References + +https://huggingface.co/SUFEHeisenberg/Fin-RoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_pipeline_en.md new file mode 100644 index 00000000000000..048750cfcca7cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fin_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fin_roberta_pipeline pipeline RoBertaForSequenceClassification from SUFEHeisenberg +author: John Snow Labs +name: fin_roberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fin_roberta_pipeline` is a English model originally trained by SUFEHeisenberg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fin_roberta_pipeline_en_5.5.0_3.0_1725542725423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fin_roberta_pipeline_en_5.5.0_3.0_1725542725423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fin_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fin_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fin_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|474.0 MB| + +## References + +https://huggingface.co/SUFEHeisenberg/Fin-RoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finance_news_classifier_kanuri_v7_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-05-finance_news_classifier_kanuri_v7_pipeline_ko.md new file mode 100644 index 00000000000000..8df57539374f5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finance_news_classifier_kanuri_v7_pipeline_ko.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Korean finance_news_classifier_kanuri_v7_pipeline pipeline XlmRoBertaForSequenceClassification from gabrielyang +author: John Snow Labs +name: finance_news_classifier_kanuri_v7_pipeline +date: 2024-09-05 +tags: [ko, open_source, pipeline, onnx] +task: Text Classification +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finance_news_classifier_kanuri_v7_pipeline` is a Korean model originally trained by gabrielyang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finance_news_classifier_kanuri_v7_pipeline_ko_5.5.0_3.0_1725513531969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finance_news_classifier_kanuri_v7_pipeline_ko_5.5.0_3.0_1725513531969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finance_news_classifier_kanuri_v7_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finance_news_classifier_kanuri_v7_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finance_news_classifier_kanuri_v7_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|1.0 GB| + +## References + +https://huggingface.co/gabrielyang/finance_news_classifier-KR_v7 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_en.md b/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_en.md new file mode 100644 index 00000000000000..030c65da370c10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finbert_pretrain BertEmbeddings from yiyanghkust +author: John Snow Labs +name: finbert_pretrain +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_pretrain` is a English model originally trained by yiyanghkust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_pretrain_en_5.5.0_3.0_1725553158869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_pretrain_en_5.5.0_3.0_1725553158869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("finbert_pretrain","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("finbert_pretrain","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_pretrain| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/yiyanghkust/finbert-pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_pipeline_en.md new file mode 100644 index 00000000000000..16f03a2bbd2499 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finbert_pretrain_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finbert_pretrain_pipeline pipeline BertEmbeddings from yiyanghkust +author: John Snow Labs +name: finbert_pretrain_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_pretrain_pipeline` is a English model originally trained by yiyanghkust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_pretrain_pipeline_en_5.5.0_3.0_1725553179960.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_pretrain_pipeline_en_5.5.0_3.0_1725553179960.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finbert_pretrain_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finbert_pretrain_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_pretrain_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/yiyanghkust/finbert-pretrain + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_en.md new file mode 100644 index 00000000000000..cf084424c18f52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English finetune_whisper_small_malay_singlish_v2 WhisperForCTC from mesolitica +author: John Snow Labs +name: finetune_whisper_small_malay_singlish_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetune_whisper_small_malay_singlish_v2` is a English model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetune_whisper_small_malay_singlish_v2_en_5.5.0_3.0_1725549598643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetune_whisper_small_malay_singlish_v2_en_5.5.0_3.0_1725549598643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("finetune_whisper_small_malay_singlish_v2","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("finetune_whisper_small_malay_singlish_v2", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetune_whisper_small_malay_singlish_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/mesolitica/finetune-whisper-small-ms-singlish-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_pipeline_en.md new file mode 100644 index 00000000000000..361f933b044e8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetune_whisper_small_malay_singlish_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetune_whisper_small_malay_singlish_v2_pipeline pipeline WhisperForCTC from mesolitica +author: John Snow Labs +name: finetune_whisper_small_malay_singlish_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetune_whisper_small_malay_singlish_v2_pipeline` is a English model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetune_whisper_small_malay_singlish_v2_pipeline_en_5.5.0_3.0_1725549696880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetune_whisper_small_malay_singlish_v2_pipeline_en_5.5.0_3.0_1725549696880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetune_whisper_small_malay_singlish_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetune_whisper_small_malay_singlish_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetune_whisper_small_malay_singlish_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/mesolitica/finetune-whisper-small-ms-singlish-v2 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_en.md new file mode 100644 index 00000000000000..dd38f5475f4956 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English finetuned_distilbert_model DistilBertForSequenceClassification from Afia-manubea +author: John Snow Labs +name: finetuned_distilbert_model +date: 2024-09-05 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_distilbert_model` is a English model originally trained by Afia-manubea. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_distilbert_model_en_5.5.0_3.0_1725505967195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_distilbert_model_en_5.5.0_3.0_1725505967195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuned_distilbert_model","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuned_distilbert_model","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_distilbert_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +References + +https://huggingface.co/Afia-manubea/FineTuned-DistilBert-Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_pipeline_en.md new file mode 100644 index 00000000000000..09fd2f4b6570d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_distilbert_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_distilbert_model_pipeline pipeline DistilBertForTokenClassification from danieladeeko +author: John Snow Labs +name: finetuned_distilbert_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_distilbert_model_pipeline` is a English model originally trained by danieladeeko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_distilbert_model_pipeline_en_5.5.0_3.0_1725505978767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_distilbert_model_pipeline_en_5.5.0_3.0_1725505978767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_distilbert_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_distilbert_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_distilbert_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/danieladeeko/finetuned_distilbert_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_en.md new file mode 100644 index 00000000000000..8232edce516122 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_helsinki_nlp_opus_maltese_korean_english MarianTransformer from dalzza +author: John Snow Labs +name: finetuned_helsinki_nlp_opus_maltese_korean_english +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_helsinki_nlp_opus_maltese_korean_english` is a English model originally trained by dalzza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_opus_maltese_korean_english_en_5.5.0_3.0_1725545022757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_opus_maltese_korean_english_en_5.5.0_3.0_1725545022757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_helsinki_nlp_opus_maltese_korean_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_helsinki_nlp_opus_maltese_korean_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_helsinki_nlp_opus_maltese_korean_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|540.7 MB| + +## References + +https://huggingface.co/dalzza/finetuned-helsinki-nlp-opus-mt-ko-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en.md new file mode 100644 index 00000000000000..42617d499885d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline pipeline MarianTransformer from dalzza +author: John Snow Labs +name: finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline` is a English model originally trained by dalzza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en_5.5.0_3.0_1725545050993.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline_en_5.5.0_3.0_1725545050993.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_helsinki_nlp_opus_maltese_korean_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|541.2 MB| + +## References + +https://huggingface.co/dalzza/finetuned-helsinki-nlp-opus-mt-ko-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_iitp_pdt_review_indic_bert_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_iitp_pdt_review_indic_bert_en.md new file mode 100644 index 00000000000000..b0165acc51c857 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_iitp_pdt_review_indic_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_iitp_pdt_review_indic_bert AlbertForSequenceClassification from aditeyabaral +author: John Snow Labs +name: finetuned_iitp_pdt_review_indic_bert +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_iitp_pdt_review_indic_bert` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_iitp_pdt_review_indic_bert_en_5.5.0_3.0_1725543431726.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_iitp_pdt_review_indic_bert_en_5.5.0_3.0_1725543431726.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("finetuned_iitp_pdt_review_indic_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("finetuned_iitp_pdt_review_indic_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_iitp_pdt_review_indic_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|127.8 MB| + +## References + +https://huggingface.co/aditeyabaral/finetuned-iitp_pdt_review-indic-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en.md new file mode 100644 index 00000000000000..4745c0a73b2eb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k MarianTransformer from HugginJake +author: John Snow Labs +name: finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k` is a English model originally trained by HugginJake. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en_5.5.0_3.0_1725495217787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k_en_5.5.0_3.0_1725495217787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_marianmtmodel_v2_specialfrom_ccmatrix77k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|509.5 MB| + +## References + +https://huggingface.co/HugginJake/Finetuned_MarianMTModel_v2_specialFrom_ccmatrix77k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_model_pipeline_en.md new file mode 100644 index 00000000000000..e792241df73b6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_model_pipeline pipeline RoBertaForSequenceClassification from MarioPenguin +author: John Snow Labs +name: finetuned_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_model_pipeline` is a English model originally trained by MarioPenguin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_model_pipeline_en_5.5.0_3.0_1725542190586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_model_pipeline_en_5.5.0_3.0_1725542190586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.2 MB| + +## References + +https://huggingface.co/MarioPenguin/finetuned-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_ner_sarthak7777_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_ner_sarthak7777_en.md new file mode 100644 index 00000000000000..2ae41a52049614 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_ner_sarthak7777_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_ner_sarthak7777 DistilBertForTokenClassification from Sarthak7777 +author: John Snow Labs +name: finetuned_ner_sarthak7777 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_ner_sarthak7777` is a English model originally trained by Sarthak7777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_ner_sarthak7777_en_5.5.0_3.0_1725500467864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_ner_sarthak7777_en_5.5.0_3.0_1725500467864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("finetuned_ner_sarthak7777","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("finetuned_ner_sarthak7777", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_ner_sarthak7777| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Sarthak7777/finetuned-NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_en.md new file mode 100644 index 00000000000000..2a16754c6e13fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_opusmt_english_tonga_tonga_islands_tamil MarianTransformer from Varsha00 +author: John Snow Labs +name: finetuned_opusmt_english_tonga_tonga_islands_tamil +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_opusmt_english_tonga_tonga_islands_tamil` is a English model originally trained by Varsha00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_tonga_tonga_islands_tamil_en_5.5.0_3.0_1725545263081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_tonga_tonga_islands_tamil_en_5.5.0_3.0_1725545263081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_opusmt_english_tonga_tonga_islands_tamil","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_opusmt_english_tonga_tonga_islands_tamil","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_opusmt_english_tonga_tonga_islands_tamil| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.2 MB| + +## References + +https://huggingface.co/Varsha00/finetuned-opusmt-en-to-ta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en.md new file mode 100644 index 00000000000000..653f7d3686d7e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline pipeline MarianTransformer from Varsha00 +author: John Snow Labs +name: finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline` is a English model originally trained by Varsha00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en_5.5.0_3.0_1725545292121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline_en_5.5.0_3.0_1725545292121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_opusmt_english_tonga_tonga_islands_tamil_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|530.7 MB| + +## References + +https://huggingface.co/Varsha00/finetuned-opusmt-en-to-ta + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_ift6758_hw6_sentiment_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_ift6758_hw6_sentiment_model_pipeline_en.md new file mode 100644 index 00000000000000..5325aae606c008 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_ift6758_hw6_sentiment_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_ift6758_hw6_sentiment_model_pipeline pipeline DistilBertForSequenceClassification from ucmp137538 +author: John Snow Labs +name: finetuning_ift6758_hw6_sentiment_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_ift6758_hw6_sentiment_model_pipeline` is a English model originally trained by ucmp137538. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_ift6758_hw6_sentiment_model_pipeline_en_5.5.0_3.0_1725507434004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_ift6758_hw6_sentiment_model_pipeline_en_5.5.0_3.0_1725507434004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_ift6758_hw6_sentiment_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_ift6758_hw6_sentiment_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_ift6758_hw6_sentiment_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.6 MB| + +## References + +https://huggingface.co/ucmp137538/finetuning-ift6758-hw6-sentiment-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_movie_sentiment_analysis_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_movie_sentiment_analysis_en.md new file mode 100644 index 00000000000000..d3ce78d5053e87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_movie_sentiment_analysis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_movie_sentiment_analysis DistilBertForSequenceClassification from MrPudge +author: John Snow Labs +name: finetuning_movie_sentiment_analysis +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_movie_sentiment_analysis` is a English model originally trained by MrPudge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_movie_sentiment_analysis_en_5.5.0_3.0_1725507138535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_movie_sentiment_analysis_en_5.5.0_3.0_1725507138535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_movie_sentiment_analysis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_movie_sentiment_analysis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_movie_sentiment_analysis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MrPudge/finetuning-movie-sentiment-analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_gaurimm_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_gaurimm_en.md new file mode 100644 index 00000000000000..3034647ec31a9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_gaurimm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_gaurimm DistilBertForSequenceClassification from gaurimm +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_gaurimm +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_gaurimm` is a English model originally trained by gaurimm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_gaurimm_en_5.5.0_3.0_1725507287733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_gaurimm_en_5.5.0_3.0_1725507287733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_gaurimm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_gaurimm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_gaurimm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/gaurimm/finetuning-sentiment-model-3000-samples \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_yuezhangjoslin_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_yuezhangjoslin_en.md new file mode 100644 index 00000000000000..6023c596d1e0a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_3000_samples_yuezhangjoslin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_yuezhangjoslin DistilBertForSequenceClassification from Yuezhangjoslin +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_yuezhangjoslin +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_yuezhangjoslin` is a English model originally trained by Yuezhangjoslin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_yuezhangjoslin_en_5.5.0_3.0_1725507325313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_yuezhangjoslin_en_5.5.0_3.0_1725507325313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_yuezhangjoslin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_yuezhangjoslin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_yuezhangjoslin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Yuezhangjoslin/finetuning-sentiment-model-3000-samples \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_en.md new file mode 100644 index 00000000000000..4e08b4a2ba920a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_thread_3000 DistilBertForSequenceClassification from sivaranjanisundarraj +author: John Snow Labs +name: finetuning_sentiment_model_thread_3000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_thread_3000` is a English model originally trained by sivaranjanisundarraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_thread_3000_en_5.5.0_3.0_1725507160593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_thread_3000_en_5.5.0_3.0_1725507160593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_thread_3000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finetuning_sentiment_model_thread_3000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_thread_3000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sivaranjanisundarraj/finetuning-sentiment-model-thread-3000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_pipeline_en.md new file mode 100644 index 00000000000000..f39ed80aab67fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-finetuning_sentiment_model_thread_3000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_sentiment_model_thread_3000_pipeline pipeline DistilBertForSequenceClassification from sivaranjanisundarraj +author: John Snow Labs +name: finetuning_sentiment_model_thread_3000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_thread_3000_pipeline` is a English model originally trained by sivaranjanisundarraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_thread_3000_pipeline_en_5.5.0_3.0_1725507172681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_thread_3000_pipeline_en_5.5.0_3.0_1725507172681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_sentiment_model_thread_3000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_sentiment_model_thread_3000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_thread_3000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sivaranjanisundarraj/finetuning-sentiment-model-thread-3000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_en.md b/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_en.md new file mode 100644 index 00000000000000..6ebd6247b42b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English flip_base_32 CLIPForZeroShotClassification from FLIP-dataset +author: John Snow Labs +name: flip_base_32 +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flip_base_32` is a English model originally trained by FLIP-dataset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flip_base_32_en_5.5.0_3.0_1725523428126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flip_base_32_en_5.5.0_3.0_1725523428126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("flip_base_32","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("flip_base_32","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flip_base_32| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/FLIP-dataset/FLIP-base-32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_pipeline_en.md new file mode 100644 index 00000000000000..9bd059cdcfae4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-flip_base_32_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English flip_base_32_pipeline pipeline CLIPForZeroShotClassification from FLIP-dataset +author: John Snow Labs +name: flip_base_32_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flip_base_32_pipeline` is a English model originally trained by FLIP-dataset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flip_base_32_pipeline_en_5.5.0_3.0_1725523455932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flip_base_32_pipeline_en_5.5.0_3.0_1725523455932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("flip_base_32_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("flip_base_32_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flip_base_32_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|567.3 MB| + +## References + +https://huggingface.co/FLIP-dataset/FLIP-base-32 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_en.md b/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_en.md new file mode 100644 index 00000000000000..5dd7b8b26b33f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English flip_large_14 CLIPForZeroShotClassification from FLIP-dataset +author: John Snow Labs +name: flip_large_14 +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flip_large_14` is a English model originally trained by FLIP-dataset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flip_large_14_en_5.5.0_3.0_1725540348997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flip_large_14_en_5.5.0_3.0_1725540348997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("flip_large_14","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("flip_large_14","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flip_large_14| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/FLIP-dataset/FLIP-large-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_pipeline_en.md new file mode 100644 index 00000000000000..e728c740507fc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-flip_large_14_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English flip_large_14_pipeline pipeline CLIPForZeroShotClassification from FLIP-dataset +author: John Snow Labs +name: flip_large_14_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flip_large_14_pipeline` is a English model originally trained by FLIP-dataset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flip_large_14_pipeline_en_5.5.0_3.0_1725540425894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flip_large_14_pipeline_en_5.5.0_3.0_1725540425894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("flip_large_14_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("flip_large_14_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flip_large_14_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/FLIP-dataset/FLIP-large-14 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_fr.md b/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_fr.md new file mode 100644 index 00000000000000..9a46c3744e209b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French fralbert_base AlbertEmbeddings from qwant +author: John Snow Labs +name: fralbert_base +date: 2024-09-05 +tags: [fr, open_source, onnx, embeddings, albert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fralbert_base` is a French model originally trained by qwant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fralbert_base_fr_5.5.0_3.0_1725568510160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fralbert_base_fr_5.5.0_3.0_1725568510160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("fralbert_base","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("fralbert_base","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fralbert_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|fr| +|Size:|43.0 MB| + +## References + +https://huggingface.co/qwant/fralbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_pipeline_fr.md new file mode 100644 index 00000000000000..8458e092746aa1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fralbert_base_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French fralbert_base_pipeline pipeline AlbertEmbeddings from qwant +author: John Snow Labs +name: fralbert_base_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fralbert_base_pipeline` is a French model originally trained by qwant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fralbert_base_pipeline_fr_5.5.0_3.0_1725568512538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fralbert_base_pipeline_fr_5.5.0_3.0_1725568512538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fralbert_base_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fralbert_base_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fralbert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|43.0 MB| + +## References + +https://huggingface.co/qwant/fralbert-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fromhf_en.md b/docs/_posts/ahmedlone127/2024-09-05-fromhf_en.md new file mode 100644 index 00000000000000..1bab47c6edf4d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fromhf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fromhf DistilBertForTokenClassification from girsha +author: John Snow Labs +name: fromhf +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fromhf` is a English model originally trained by girsha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fromhf_en_5.5.0_3.0_1725496261015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fromhf_en_5.5.0_3.0_1725496261015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("fromhf","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("fromhf", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fromhf| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/girsha/fromHF \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-fromhf_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-fromhf_pipeline_en.md new file mode 100644 index 00000000000000..9431f80c96f5dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-fromhf_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fromhf_pipeline pipeline DistilBertForTokenClassification from girsha +author: John Snow Labs +name: fromhf_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fromhf_pipeline` is a English model originally trained by girsha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fromhf_pipeline_en_5.5.0_3.0_1725496273142.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fromhf_pipeline_en_5.5.0_3.0_1725496273142.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fromhf_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fromhf_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fromhf_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/girsha/fromHF + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en.md new file mode 100644 index 00000000000000..fcbe033a373af4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ft_distilbert_gest_pred_seqeval_partialmatch_pipeline pipeline DistilBertForTokenClassification from Jsevisal +author: John Snow Labs +name: ft_distilbert_gest_pred_seqeval_partialmatch_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_distilbert_gest_pred_seqeval_partialmatch_pipeline` is a English model originally trained by Jsevisal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en_5.5.0_3.0_1725500838182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_distilbert_gest_pred_seqeval_partialmatch_pipeline_en_5.5.0_3.0_1725500838182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ft_distilbert_gest_pred_seqeval_partialmatch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ft_distilbert_gest_pred_seqeval_partialmatch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_distilbert_gest_pred_seqeval_partialmatch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/Jsevisal/ft-distilbert-gest-pred-seqeval-partialmatch + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en.md new file mode 100644 index 00000000000000..9da247e5927edd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup RoBertaForSequenceClassification from NLP-FEUP +author: John Snow Labs +name: ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup` is a English model originally trained by NLP-FEUP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en_5.5.0_3.0_1725542445028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_en_5.5.0_3.0_1725542445028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.0 MB| + +## References + +https://huggingface.co/NLP-FEUP/FT-mrm8488-distilroberta-finetuned-financial-news-sentiment-analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en.md new file mode 100644 index 00000000000000..d8ad64d81a9874 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline pipeline RoBertaForSequenceClassification from NLP-FEUP +author: John Snow Labs +name: ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline` is a English model originally trained by NLP-FEUP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en_5.5.0_3.0_1725542461418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline_en_5.5.0_3.0_1725542461418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_nlp_feup_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/NLP-FEUP/FT-mrm8488-distilroberta-finetuned-financial-news-sentiment-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en.md new file mode 100644 index 00000000000000..156ba85780b632 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro RoBertaForSequenceClassification from rnribeiro +author: John Snow Labs +name: ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro` is a English model originally trained by rnribeiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en_5.5.0_3.0_1725542019426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_en_5.5.0_3.0_1725542019426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.0 MB| + +## References + +https://huggingface.co/rnribeiro/FT-mrm8488-distilroberta-finetuned-financial-news-sentiment-analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en.md new file mode 100644 index 00000000000000..fd1fa5d107f9c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline pipeline RoBertaForSequenceClassification from rnribeiro +author: John Snow Labs +name: ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline` is a English model originally trained by rnribeiro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en_5.5.0_3.0_1725542034026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline_en_5.5.0_3.0_1725542034026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_mrm8488_distilroberta_finetuned_financial_news_sentiment_analysis_rnribeiro_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/rnribeiro/FT-mrm8488-distilroberta-finetuned-financial-news-sentiment-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_en.md new file mode 100644 index 00000000000000..c7afb6472ff005 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ft_opensubs_arabic_english_marianmt MarianTransformer from abdusah +author: John Snow Labs +name: ft_opensubs_arabic_english_marianmt +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_opensubs_arabic_english_marianmt` is a English model originally trained by abdusah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_opensubs_arabic_english_marianmt_en_5.5.0_3.0_1725546331902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_opensubs_arabic_english_marianmt_en_5.5.0_3.0_1725546331902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("ft_opensubs_arabic_english_marianmt","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("ft_opensubs_arabic_english_marianmt","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_opensubs_arabic_english_marianmt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|528.0 MB| + +## References + +https://huggingface.co/abdusah/ft-opensubs-ar-en-marianmt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_pipeline_en.md new file mode 100644 index 00000000000000..e8c0a100ccbef2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ft_opensubs_arabic_english_marianmt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ft_opensubs_arabic_english_marianmt_pipeline pipeline MarianTransformer from abdusah +author: John Snow Labs +name: ft_opensubs_arabic_english_marianmt_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ft_opensubs_arabic_english_marianmt_pipeline` is a English model originally trained by abdusah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ft_opensubs_arabic_english_marianmt_pipeline_en_5.5.0_3.0_1725546358550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ft_opensubs_arabic_english_marianmt_pipeline_en_5.5.0_3.0_1725546358550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ft_opensubs_arabic_english_marianmt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ft_opensubs_arabic_english_marianmt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ft_opensubs_arabic_english_marianmt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.5 MB| + +## References + +https://huggingface.co/abdusah/ft-opensubs-ar-en-marianmt + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_en.md b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_en.md new file mode 100644 index 00000000000000..86e7232ad6e81a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English furina_seed42_eng_amh_esp_roman XlmRoBertaForSequenceClassification from Shijia +author: John Snow Labs +name: furina_seed42_eng_amh_esp_roman +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`furina_seed42_eng_amh_esp_roman` is a English model originally trained by Shijia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_amh_esp_roman_en_5.5.0_3.0_1725535928652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_amh_esp_roman_en_5.5.0_3.0_1725535928652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("furina_seed42_eng_amh_esp_roman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("furina_seed42_eng_amh_esp_roman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|furina_seed42_eng_amh_esp_roman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Shijia/furina_seed42_eng_amh_esp_roman \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_pipeline_en.md new file mode 100644 index 00000000000000..3cac8681f3aa9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_amh_esp_roman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English furina_seed42_eng_amh_esp_roman_pipeline pipeline XlmRoBertaForSequenceClassification from Shijia +author: John Snow Labs +name: furina_seed42_eng_amh_esp_roman_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`furina_seed42_eng_amh_esp_roman_pipeline` is a English model originally trained by Shijia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_amh_esp_roman_pipeline_en_5.5.0_3.0_1725535997843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_amh_esp_roman_pipeline_en_5.5.0_3.0_1725535997843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("furina_seed42_eng_amh_esp_roman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("furina_seed42_eng_amh_esp_roman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|furina_seed42_eng_amh_esp_roman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Shijia/furina_seed42_eng_amh_esp_roman + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_en.md b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_en.md new file mode 100644 index 00000000000000..a0031e3f09237f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English furina_seed42_eng XlmRoBertaForSequenceClassification from Shijia +author: John Snow Labs +name: furina_seed42_eng +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`furina_seed42_eng` is a English model originally trained by Shijia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_en_5.5.0_3.0_1725529852404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_en_5.5.0_3.0_1725529852404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("furina_seed42_eng","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("furina_seed42_eng", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|furina_seed42_eng| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Shijia/furina_seed42_eng \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_pipeline_en.md new file mode 100644 index 00000000000000..e50100dce13d6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-furina_seed42_eng_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English furina_seed42_eng_pipeline pipeline XlmRoBertaForSequenceClassification from Shijia +author: John Snow Labs +name: furina_seed42_eng_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`furina_seed42_eng_pipeline` is a English model originally trained by Shijia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_pipeline_en_5.5.0_3.0_1725529929893.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/furina_seed42_eng_pipeline_en_5.5.0_3.0_1725529929893.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("furina_seed42_eng_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("furina_seed42_eng_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|furina_seed42_eng_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Shijia/furina_seed42_eng + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-g3_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-g3_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..3701eae6792cf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-g3_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English g3_finetuned_ner_pipeline pipeline DistilBertForTokenClassification from sahillihas +author: John Snow Labs +name: g3_finetuned_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`g3_finetuned_ner_pipeline` is a English model originally trained by sahillihas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/g3_finetuned_ner_pipeline_en_5.5.0_3.0_1725500931044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/g3_finetuned_ner_pipeline_en_5.5.0_3.0_1725500931044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("g3_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("g3_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|g3_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/sahillihas/G3-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_en.md b/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_en.md new file mode 100644 index 00000000000000..f27dfeb523fb43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English game_content_safety XlmRoBertaForSequenceClassification from BrightXiaoHan +author: John Snow Labs +name: game_content_safety +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`game_content_safety` is a English model originally trained by BrightXiaoHan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/game_content_safety_en_5.5.0_3.0_1725537423966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/game_content_safety_en_5.5.0_3.0_1725537423966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("game_content_safety","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("game_content_safety", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|game_content_safety| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|861.6 MB| + +## References + +https://huggingface.co/BrightXiaoHan/game-content-safety \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_pipeline_en.md new file mode 100644 index 00000000000000..ccbe558a7c8ca2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-game_content_safety_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English game_content_safety_pipeline pipeline XlmRoBertaForSequenceClassification from BrightXiaoHan +author: John Snow Labs +name: game_content_safety_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`game_content_safety_pipeline` is a English model originally trained by BrightXiaoHan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/game_content_safety_pipeline_en_5.5.0_3.0_1725537507524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/game_content_safety_pipeline_en_5.5.0_3.0_1725537507524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("game_content_safety_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("game_content_safety_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|game_content_safety_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|861.6 MB| + +## References + +https://huggingface.co/BrightXiaoHan/game-content-safety + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ganda_english_conrad747_en.md b/docs/_posts/ahmedlone127/2024-09-05-ganda_english_conrad747_en.md new file mode 100644 index 00000000000000..f8fba644fdf1a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ganda_english_conrad747_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ganda_english_conrad747 MarianTransformer from Conrad747 +author: John Snow Labs +name: ganda_english_conrad747 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ganda_english_conrad747` is a English model originally trained by Conrad747. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ganda_english_conrad747_en_5.5.0_3.0_1725545972609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ganda_english_conrad747_en_5.5.0_3.0_1725545972609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("ganda_english_conrad747","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("ganda_english_conrad747","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ganda_english_conrad747| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.4 MB| + +## References + +https://huggingface.co/Conrad747/lg-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_de.md b/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_de.md new file mode 100644 index 00000000000000..c619a739c17ef3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German german_text_classification XlmRoBertaForSequenceClassification from RashidNLP +author: John Snow Labs +name: german_text_classification +date: 2024-09-05 +tags: [de, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_text_classification` is a German model originally trained by RashidNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_text_classification_de_5.5.0_3.0_1725529029435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_text_classification_de_5.5.0_3.0_1725529029435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("german_text_classification","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("german_text_classification", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_text_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|de| +|Size:|779.2 MB| + +## References + +https://huggingface.co/RashidNLP/German-Text-Classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_pipeline_de.md new file mode 100644 index 00000000000000..3ca79b9aa85cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-german_text_classification_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German german_text_classification_pipeline pipeline XlmRoBertaForSequenceClassification from RashidNLP +author: John Snow Labs +name: german_text_classification_pipeline +date: 2024-09-05 +tags: [de, open_source, pipeline, onnx] +task: Text Classification +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_text_classification_pipeline` is a German model originally trained by RashidNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_text_classification_pipeline_de_5.5.0_3.0_1725529174863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_text_classification_pipeline_de_5.5.0_3.0_1725529174863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("german_text_classification_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("german_text_classification_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_text_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|779.2 MB| + +## References + +https://huggingface.co/RashidNLP/German-Text-Classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-greberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-greberta_en.md new file mode 100644 index 00000000000000..07543ccd535fc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-greberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English greberta RoBertaEmbeddings from bowphs +author: John Snow Labs +name: greberta +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greberta` is a English model originally trained by bowphs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greberta_en_5.5.0_3.0_1725566266142.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greberta_en_5.5.0_3.0_1725566266142.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("greberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("greberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|468.1 MB| + +## References + +https://huggingface.co/bowphs/GreBerta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-greberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-greberta_pipeline_en.md new file mode 100644 index 00000000000000..71ca1adebc3238 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-greberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English greberta_pipeline pipeline RoBertaEmbeddings from bowphs +author: John Snow Labs +name: greberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greberta_pipeline` is a English model originally trained by bowphs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greberta_pipeline_en_5.5.0_3.0_1725566290915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greberta_pipeline_en_5.5.0_3.0_1725566290915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("greberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("greberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.1 MB| + +## References + +https://huggingface.co/bowphs/GreBerta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_en.md new file mode 100644 index 00000000000000..4ba6e0fd47c4a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gun_nlth_base MarianTransformer from tiagoblima +author: John Snow Labs +name: gun_nlth_base +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gun_nlth_base` is a English model originally trained by tiagoblima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gun_nlth_base_en_5.5.0_3.0_1725544873623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gun_nlth_base_en_5.5.0_3.0_1725544873623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("gun_nlth_base","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("gun_nlth_base","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gun_nlth_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|220.2 MB| + +## References + +https://huggingface.co/tiagoblima/gun-nlth-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_pipeline_en.md new file mode 100644 index 00000000000000..c889dccb6f0910 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-gun_nlth_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gun_nlth_base_pipeline pipeline MarianTransformer from tiagoblima +author: John Snow Labs +name: gun_nlth_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gun_nlth_base_pipeline` is a English model originally trained by tiagoblima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gun_nlth_base_pipeline_en_5.5.0_3.0_1725544884948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gun_nlth_base_pipeline_en_5.5.0_3.0_1725544884948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gun_nlth_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gun_nlth_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gun_nlth_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|220.8 MB| + +## References + +https://huggingface.co/tiagoblima/gun-nlth-base + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_en.md new file mode 100644 index 00000000000000..a699d229a1b394 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gysbert_v2 BertEmbeddings from emanjavacas +author: John Snow Labs +name: gysbert_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gysbert_v2` is a English model originally trained by emanjavacas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gysbert_v2_en_5.5.0_3.0_1725552844268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gysbert_v2_en_5.5.0_3.0_1725552844268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("gysbert_v2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("gysbert_v2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gysbert_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/emanjavacas/GysBERT-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_pipeline_en.md new file mode 100644 index 00000000000000..8f3fe511b09d25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-gysbert_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gysbert_v2_pipeline pipeline BertEmbeddings from emanjavacas +author: John Snow Labs +name: gysbert_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gysbert_v2_pipeline` is a English model originally trained by emanjavacas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gysbert_v2_pipeline_en_5.5.0_3.0_1725552870270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gysbert_v2_pipeline_en_5.5.0_3.0_1725552870270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gysbert_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gysbert_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gysbert_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/emanjavacas/GysBERT-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_fa.md b/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_fa.md new file mode 100644 index 00000000000000..859e3933ee025a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian hafez_ner BertForTokenClassification from ViravirastSHZ +author: John Snow Labs +name: hafez_ner +date: 2024-09-05 +tags: [fa, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hafez_ner` is a Persian model originally trained by ViravirastSHZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hafez_ner_fa_5.5.0_3.0_1725538798259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hafez_ner_fa_5.5.0_3.0_1725538798259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("hafez_ner","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("hafez_ner", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hafez_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|408.2 MB| + +## References + +https://huggingface.co/ViravirastSHZ/Hafez-NER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_pipeline_fa.md new file mode 100644 index 00000000000000..8a51f8562d54c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hafez_ner_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian hafez_ner_pipeline pipeline BertForTokenClassification from ViravirastSHZ +author: John Snow Labs +name: hafez_ner_pipeline +date: 2024-09-05 +tags: [fa, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hafez_ner_pipeline` is a Persian model originally trained by ViravirastSHZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hafez_ner_pipeline_fa_5.5.0_3.0_1725538819123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hafez_ner_pipeline_fa_5.5.0_3.0_1725538819123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hafez_ner_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hafez_ner_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hafez_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|408.3 MB| + +## References + +https://huggingface.co/ViravirastSHZ/Hafez-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_en.md new file mode 100644 index 00000000000000..650f0db2be602e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hate_hate_random1_seed0_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random1_seed0_bernice +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random1_seed0_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random1_seed0_bernice_en_5.5.0_3.0_1725529035245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random1_seed0_bernice_en_5.5.0_3.0_1725529035245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random1_seed0_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random1_seed0_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random1_seed0_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|783.3 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random1_seed0-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_pipeline_en.md new file mode 100644 index 00000000000000..321fa92eef58db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hate_hate_random1_seed0_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hate_hate_random1_seed0_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random1_seed0_bernice_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random1_seed0_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random1_seed0_bernice_pipeline_en_5.5.0_3.0_1725529185433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random1_seed0_bernice_pipeline_en_5.5.0_3.0_1725529185433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hate_hate_random1_seed0_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hate_hate_random1_seed0_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random1_seed0_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.3 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random1_seed0-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_en.md new file mode 100644 index 00000000000000..d7802f34a36e27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_danish_swedish_v17 MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v17 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v17` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v17_en_5.5.0_3.0_1725545442736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v17_en_5.5.0_3.0_1725545442736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_danish_swedish_v17","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_danish_swedish_v17","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v17| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|496.8 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_pipeline_en.md new file mode 100644 index 00000000000000..1f41b904c021e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v17_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English helsinki_danish_swedish_v17_pipeline pipeline MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v17_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v17_pipeline` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v17_pipeline_en_5.5.0_3.0_1725545469624.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v17_pipeline_en_5.5.0_3.0_1725545469624.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("helsinki_danish_swedish_v17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("helsinki_danish_swedish_v17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|497.3 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v17 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_en.md new file mode 100644 index 00000000000000..78c8952668f846 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_danish_swedish_v6 MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v6 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v6` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v6_en_5.5.0_3.0_1725545085226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v6_en_5.5.0_3.0_1725545085226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_danish_swedish_v6","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_danish_swedish_v6","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|497.5 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_pipeline_en.md new file mode 100644 index 00000000000000..7c8f0f6d618fa1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_danish_swedish_v6_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English helsinki_danish_swedish_v6_pipeline pipeline MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v6_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v6_pipeline` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v6_pipeline_en_5.5.0_3.0_1725545112564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v6_pipeline_en_5.5.0_3.0_1725545112564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("helsinki_danish_swedish_v6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("helsinki_danish_swedish_v6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|498.0 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v6 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_en.md new file mode 100644 index 00000000000000..c415807858ecb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_nlp_opus_maltese_uyghur MarianTransformer from MT-Informal-Languages +author: John Snow Labs +name: helsinki_nlp_opus_maltese_uyghur +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_nlp_opus_maltese_uyghur` is a English model originally trained by MT-Informal-Languages. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_uyghur_en_5.5.0_3.0_1725546107031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_uyghur_en_5.5.0_3.0_1725546107031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_nlp_opus_maltese_uyghur","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_nlp_opus_maltese_uyghur","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_nlp_opus_maltese_uyghur| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.6 MB| + +## References + +https://huggingface.co/MT-Informal-Languages/Helsinki-NLP-opus-mt-ug \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_pipeline_en.md new file mode 100644 index 00000000000000..8f770660e2f25e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-helsinki_nlp_opus_maltese_uyghur_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English helsinki_nlp_opus_maltese_uyghur_pipeline pipeline MarianTransformer from MT-Informal-Languages +author: John Snow Labs +name: helsinki_nlp_opus_maltese_uyghur_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_nlp_opus_maltese_uyghur_pipeline` is a English model originally trained by MT-Informal-Languages. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_uyghur_pipeline_en_5.5.0_3.0_1725546136370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_nlp_opus_maltese_uyghur_pipeline_en_5.5.0_3.0_1725546136370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("helsinki_nlp_opus_maltese_uyghur_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("helsinki_nlp_opus_maltese_uyghur_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_nlp_opus_maltese_uyghur_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|533.1 MB| + +## References + +https://huggingface.co/MT-Informal-Languages/Helsinki-NLP-opus-mt-ug + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_en.md b/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_en.md new file mode 100644 index 00000000000000..bd47d27b89007b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm DistilBertEmbeddings from nos1de +author: John Snow Labs +name: hf_distilbert_imdb_mlm +date: 2024-09-05 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm` is a English model originally trained by nos1de. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_en_5.5.0_3.0_1725524138179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_en_5.5.0_3.0_1725524138179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hf_distilbert_imdb_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hf_distilbert_imdb_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/nos1de/hf-distilbert-imdb-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_pipeline_en.md new file mode 100644 index 00000000000000..c030ebeec020a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hf_distilbert_imdb_mlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm_pipeline pipeline DistilBertEmbeddings from ecwk +author: John Snow Labs +name: hf_distilbert_imdb_mlm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm_pipeline` is a English model originally trained by ecwk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_pipeline_en_5.5.0_3.0_1725524150180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_pipeline_en_5.5.0_3.0_1725524150180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hf_distilbert_imdb_mlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hf_distilbert_imdb_mlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ecwk/hf-distilbert-imdb-mlm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hihu2_en.md b/docs/_posts/ahmedlone127/2024-09-05-hihu2_en.md new file mode 100644 index 00000000000000..6fd4bc93945139 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hihu2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hihu2 XlmRoBertaForSequenceClassification from wnic00 +author: John Snow Labs +name: hihu2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hihu2` is a English model originally trained by wnic00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hihu2_en_5.5.0_3.0_1725530383835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hihu2_en_5.5.0_3.0_1725530383835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hihu2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hihu2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hihu2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/wnic00/hihu2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hihu2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hihu2_pipeline_en.md new file mode 100644 index 00000000000000..68cdfd7ebbcba5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hihu2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hihu2_pipeline pipeline XlmRoBertaForSequenceClassification from wnic00 +author: John Snow Labs +name: hihu2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hihu2_pipeline` is a English model originally trained by wnic00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hihu2_pipeline_en_5.5.0_3.0_1725530436917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hihu2_pipeline_en_5.5.0_3.0_1725530436917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hihu2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hihu2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hihu2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/wnic00/hihu2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hihu4_en.md b/docs/_posts/ahmedlone127/2024-09-05-hihu4_en.md new file mode 100644 index 00000000000000..acec19240d99b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hihu4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hihu4 XlmRoBertaForSequenceClassification from wnic00 +author: John Snow Labs +name: hihu4 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hihu4` is a English model originally trained by wnic00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hihu4_en_5.5.0_3.0_1725527661248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hihu4_en_5.5.0_3.0_1725527661248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hihu4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hihu4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hihu4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/wnic00/hihu4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hihu4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hihu4_pipeline_en.md new file mode 100644 index 00000000000000..5e885a0ac43826 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hihu4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hihu4_pipeline pipeline XlmRoBertaForSequenceClassification from wnic00 +author: John Snow Labs +name: hihu4_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hihu4_pipeline` is a English model originally trained by wnic00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hihu4_pipeline_en_5.5.0_3.0_1725527711517.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hihu4_pipeline_en_5.5.0_3.0_1725527711517.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hihu4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hihu4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hihu4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/wnic00/hihu4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_hi.md b/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_hi.md new file mode 100644 index 00000000000000..b72b3ea7ae43e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_hi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_albert AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_albert +date: 2024-09-05 +tags: [hi, open_source, onnx, embeddings, albert] +task: Embeddings +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_albert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_albert_hi_5.5.0_3.0_1725568440553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_albert_hi_5.5.0_3.0_1725568440553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("hindi_marathi_dev_albert","hi") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("hindi_marathi_dev_albert","hi") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|hi| +|Size:|125.3 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_pipeline_hi.md new file mode 100644 index 00000000000000..f236c95c7676cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hindi_marathi_dev_albert_pipeline_hi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_albert_pipeline pipeline AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_albert_pipeline +date: 2024-09-05 +tags: [hi, open_source, pipeline, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_albert_pipeline` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_albert_pipeline_hi_5.5.0_3.0_1725568447098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_albert_pipeline_hi_5.5.0_3.0_1725568447098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hindi_marathi_dev_albert_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hindi_marathi_dev_albert_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|125.3 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hiner_di_en.md b/docs/_posts/ahmedlone127/2024-09-05-hiner_di_en.md new file mode 100644 index 00000000000000..635d2c0b08e369 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hiner_di_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hiner_di DistilBertForTokenClassification from TathagatAgrawal +author: John Snow Labs +name: hiner_di +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hiner_di` is a English model originally trained by TathagatAgrawal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hiner_di_en_5.5.0_3.0_1725506360228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hiner_di_en_5.5.0_3.0_1725506360228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("hiner_di","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("hiner_di", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hiner_di| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/TathagatAgrawal/HiNER_DI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hiner_di_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hiner_di_pipeline_en.md new file mode 100644 index 00000000000000..b04944f2ac24ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hiner_di_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hiner_di_pipeline pipeline DistilBertForTokenClassification from TathagatAgrawal +author: John Snow Labs +name: hiner_di_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hiner_di_pipeline` is a English model originally trained by TathagatAgrawal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hiner_di_pipeline_en_5.5.0_3.0_1725506371744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hiner_di_pipeline_en_5.5.0_3.0_1725506371744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hiner_di_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hiner_di_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hiner_di_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/TathagatAgrawal/HiNER_DI + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_de.md b/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_de.md new file mode 100644 index 00000000000000..2b8099eac6bbb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German hotelbert_small RoBertaEmbeddings from FabianGroeger +author: John Snow Labs +name: hotelbert_small +date: 2024-09-05 +tags: [de, open_source, onnx, embeddings, roberta] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hotelbert_small` is a German model originally trained by FabianGroeger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hotelbert_small_de_5.5.0_3.0_1725572828627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hotelbert_small_de_5.5.0_3.0_1725572828627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("hotelbert_small","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("hotelbert_small","de") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hotelbert_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|de| +|Size:|310.7 MB| + +## References + +https://huggingface.co/FabianGroeger/HotelBERT-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_pipeline_de.md new file mode 100644 index 00000000000000..7299c5428bf1b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hotelbert_small_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German hotelbert_small_pipeline pipeline RoBertaEmbeddings from FabianGroeger +author: John Snow Labs +name: hotelbert_small_pipeline +date: 2024-09-05 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hotelbert_small_pipeline` is a German model originally trained by FabianGroeger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hotelbert_small_pipeline_de_5.5.0_3.0_1725572844757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hotelbert_small_pipeline_de_5.5.0_3.0_1725572844757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hotelbert_small_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hotelbert_small_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hotelbert_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|310.7 MB| + +## References + +https://huggingface.co/FabianGroeger/HotelBERT-small + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en.md new file mode 100644 index 00000000000000..809d69e9dd88e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline pipeline DistilBertForSequenceClassification from luciayn +author: John Snow Labs +name: human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline` is a English model originally trained by luciayn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en_5.5.0_3.0_1725580390521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline_en_5.5.0_3.0_1725580390521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|human_vs_llm_generated_text_detection_distilbert_luciayn_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/luciayn/human-vs-llm-generated-text-detection-distilbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_en.md b/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_en.md new file mode 100644 index 00000000000000..b42a27e0475257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hw001_leochenwj DistilBertForSequenceClassification from leochenwj +author: John Snow Labs +name: hw001_leochenwj +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw001_leochenwj` is a English model originally trained by leochenwj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw001_leochenwj_en_5.5.0_3.0_1725580003883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw001_leochenwj_en_5.5.0_3.0_1725580003883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw001_leochenwj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw001_leochenwj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw001_leochenwj| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/leochenwj/HW001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_pipeline_en.md new file mode 100644 index 00000000000000..637208517968ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hw001_leochenwj_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hw001_leochenwj_pipeline pipeline DistilBertForSequenceClassification from leochenwj +author: John Snow Labs +name: hw001_leochenwj_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw001_leochenwj_pipeline` is a English model originally trained by leochenwj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw001_leochenwj_pipeline_en_5.5.0_3.0_1725580016148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw001_leochenwj_pipeline_en_5.5.0_3.0_1725580016148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hw001_leochenwj_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hw001_leochenwj_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw001_leochenwj_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/leochenwj/HW001 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-hw01_liamli1991_en.md b/docs/_posts/ahmedlone127/2024-09-05-hw01_liamli1991_en.md new file mode 100644 index 00000000000000..d3decd6e336471 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-hw01_liamli1991_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hw01_liamli1991 DistilBertForSequenceClassification from LiamLi1991 +author: John Snow Labs +name: hw01_liamli1991 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw01_liamli1991` is a English model originally trained by LiamLi1991. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw01_liamli1991_en_5.5.0_3.0_1725507438195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw01_liamli1991_en_5.5.0_3.0_1725507438195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw01_liamli1991","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw01_liamli1991", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw01_liamli1991| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/LiamLi1991/HW01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ibert_roberta_base_finetuned_wikineural_en.md b/docs/_posts/ahmedlone127/2024-09-05-ibert_roberta_base_finetuned_wikineural_en.md new file mode 100644 index 00000000000000..7b14905027368e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ibert_roberta_base_finetuned_wikineural_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ibert_roberta_base_finetuned_wikineural RoBertaForTokenClassification from DunnBC22 +author: John Snow Labs +name: ibert_roberta_base_finetuned_wikineural +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ibert_roberta_base_finetuned_wikineural` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_finetuned_wikineural_en_5.5.0_3.0_1725512314209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ibert_roberta_base_finetuned_wikineural_en_5.5.0_3.0_1725512314209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("ibert_roberta_base_finetuned_wikineural","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("ibert_roberta_base_finetuned_wikineural", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ibert_roberta_base_finetuned_wikineural| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|462.0 MB| + +## References + +https://huggingface.co/DunnBC22/ibert-roberta-base-finetuned-WikiNeural \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_is.md b/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_is.md new file mode 100644 index 00000000000000..d3e160b28df4eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_is.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Icelandic icebert_vesteinn RoBertaEmbeddings from vesteinn +author: John Snow Labs +name: icebert_vesteinn +date: 2024-09-05 +tags: [is, open_source, onnx, embeddings, roberta] +task: Embeddings +language: is +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`icebert_vesteinn` is a Icelandic model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/icebert_vesteinn_is_5.5.0_3.0_1725578071432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/icebert_vesteinn_is_5.5.0_3.0_1725578071432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("icebert_vesteinn","is") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("icebert_vesteinn","is") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|icebert_vesteinn| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|is| +|Size:|296.5 MB| + +## References + +https://huggingface.co/vesteinn/IceBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_pipeline_is.md b/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_pipeline_is.md new file mode 100644 index 00000000000000..e74e33b7967301 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-icebert_vesteinn_pipeline_is.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Icelandic icebert_vesteinn_pipeline pipeline RoBertaEmbeddings from vesteinn +author: John Snow Labs +name: icebert_vesteinn_pipeline +date: 2024-09-05 +tags: [is, open_source, pipeline, onnx] +task: Embeddings +language: is +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`icebert_vesteinn_pipeline` is a Icelandic model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/icebert_vesteinn_pipeline_is_5.5.0_3.0_1725578155975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/icebert_vesteinn_pipeline_is_5.5.0_3.0_1725578155975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("icebert_vesteinn_pipeline", lang = "is") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("icebert_vesteinn_pipeline", lang = "is") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|icebert_vesteinn_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|is| +|Size:|296.5 MB| + +## References + +https://huggingface.co/vesteinn/IceBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_en.md b/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_en.md new file mode 100644 index 00000000000000..6ecbff69d46a72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English imdb_spoiler_distilbertorigdatasetlr1 DistilBertForSequenceClassification from Zritze +author: John Snow Labs +name: imdb_spoiler_distilbertorigdatasetlr1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_spoiler_distilbertorigdatasetlr1` is a English model originally trained by Zritze. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_spoiler_distilbertorigdatasetlr1_en_5.5.0_3.0_1725580197207.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_spoiler_distilbertorigdatasetlr1_en_5.5.0_3.0_1725580197207.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_spoiler_distilbertorigdatasetlr1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_spoiler_distilbertorigdatasetlr1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_spoiler_distilbertorigdatasetlr1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Zritze/imdb-spoiler-distilbertOrigDatasetLR1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_pipeline_en.md new file mode 100644 index 00000000000000..a38c793752ca84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-imdb_spoiler_distilbertorigdatasetlr1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_spoiler_distilbertorigdatasetlr1_pipeline pipeline DistilBertForSequenceClassification from Zritze +author: John Snow Labs +name: imdb_spoiler_distilbertorigdatasetlr1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_spoiler_distilbertorigdatasetlr1_pipeline` is a English model originally trained by Zritze. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_spoiler_distilbertorigdatasetlr1_pipeline_en_5.5.0_3.0_1725580209895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_spoiler_distilbertorigdatasetlr1_pipeline_en_5.5.0_3.0_1725580209895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_spoiler_distilbertorigdatasetlr1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_spoiler_distilbertorigdatasetlr1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_spoiler_distilbertorigdatasetlr1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Zritze/imdb-spoiler-distilbertOrigDatasetLR1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_en.md b/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_en.md new file mode 100644 index 00000000000000..034208ce5a73fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English improved_xlm_roberta_base_nodroput XlmRoBertaForSequenceClassification from Anwaarma +author: John Snow Labs +name: improved_xlm_roberta_base_nodroput +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improved_xlm_roberta_base_nodroput` is a English model originally trained by Anwaarma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improved_xlm_roberta_base_nodroput_en_5.5.0_3.0_1725526924568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improved_xlm_roberta_base_nodroput_en_5.5.0_3.0_1725526924568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("improved_xlm_roberta_base_nodroput","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("improved_xlm_roberta_base_nodroput", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improved_xlm_roberta_base_nodroput| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|655.1 MB| + +## References + +https://huggingface.co/Anwaarma/Improved-xlm-roberta-base-nodroput \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_pipeline_en.md new file mode 100644 index 00000000000000..115aaa94ca83f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-improved_xlm_roberta_base_nodroput_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English improved_xlm_roberta_base_nodroput_pipeline pipeline XlmRoBertaForSequenceClassification from Anwaarma +author: John Snow Labs +name: improved_xlm_roberta_base_nodroput_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improved_xlm_roberta_base_nodroput_pipeline` is a English model originally trained by Anwaarma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improved_xlm_roberta_base_nodroput_pipeline_en_5.5.0_3.0_1725527114126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improved_xlm_roberta_base_nodroput_pipeline_en_5.5.0_3.0_1725527114126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("improved_xlm_roberta_base_nodroput_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("improved_xlm_roberta_base_nodroput_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improved_xlm_roberta_base_nodroput_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|655.2 MB| + +## References + +https://huggingface.co/Anwaarma/Improved-xlm-roberta-base-nodroput + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-incollection_recognizer_en.md b/docs/_posts/ahmedlone127/2024-09-05-incollection_recognizer_en.md new file mode 100644 index 00000000000000..3b5649c0bb3b8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-incollection_recognizer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English incollection_recognizer DistilBertForSequenceClassification from LaLaf93 +author: John Snow Labs +name: incollection_recognizer +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incollection_recognizer` is a English model originally trained by LaLaf93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incollection_recognizer_en_5.5.0_3.0_1725580270796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incollection_recognizer_en_5.5.0_3.0_1725580270796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("incollection_recognizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("incollection_recognizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incollection_recognizer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/LaLaf93/incollection_recognizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-indic_bert_finetuned_legal_try0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-indic_bert_finetuned_legal_try0_pipeline_en.md new file mode 100644 index 00000000000000..94f4475cebcbb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-indic_bert_finetuned_legal_try0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indic_bert_finetuned_legal_try0_pipeline pipeline AlbertForSequenceClassification from PoptropicaSahil +author: John Snow Labs +name: indic_bert_finetuned_legal_try0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indic_bert_finetuned_legal_try0_pipeline` is a English model originally trained by PoptropicaSahil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indic_bert_finetuned_legal_try0_pipeline_en_5.5.0_3.0_1725510007373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indic_bert_finetuned_legal_try0_pipeline_en_5.5.0_3.0_1725510007373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indic_bert_finetuned_legal_try0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indic_bert_finetuned_legal_try0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indic_bert_finetuned_legal_try0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|127.8 MB| + +## References + +https://huggingface.co/PoptropicaSahil/indic-bert-finetuned-legal_try0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-indicbertner_en.md b/docs/_posts/ahmedlone127/2024-09-05-indicbertner_en.md new file mode 100644 index 00000000000000..0b74dac080e1f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-indicbertner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English indicbertner AlbertForTokenClassification from livinNector +author: John Snow Labs +name: indicbertner +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, albert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbertner` is a English model originally trained by livinNector. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbertner_en_5.5.0_3.0_1725503646568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbertner_en_5.5.0_3.0_1725503646568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = AlbertForTokenClassification.pretrained("indicbertner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = AlbertForTokenClassification.pretrained("indicbertner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbertner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|125.6 MB| + +## References + +https://huggingface.co/livinNector/IndicBERTNER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-indicbertner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-indicbertner_pipeline_en.md new file mode 100644 index 00000000000000..21b600243549f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-indicbertner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English indicbertner_pipeline pipeline AlbertForTokenClassification from livinNector +author: John Snow Labs +name: indicbertner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbertner_pipeline` is a English model originally trained by livinNector. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbertner_pipeline_en_5.5.0_3.0_1725503652784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbertner_pipeline_en_5.5.0_3.0_1725503652784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indicbertner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indicbertner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbertner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|125.6 MB| + +## References + +https://huggingface.co/livinNector/IndicBERTNER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-indonesian_multi_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-05-indonesian_multi_pipeline_id.md new file mode 100644 index 00000000000000..c9bc19f28dcfbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-indonesian_multi_pipeline_id.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Indonesian indonesian_multi_pipeline pipeline XlmRoBertaForQuestionAnswering from simoneZethof +author: John Snow Labs +name: indonesian_multi_pipeline +date: 2024-09-05 +tags: [id, open_source, pipeline, onnx] +task: Question Answering +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indonesian_multi_pipeline` is a Indonesian model originally trained by simoneZethof. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indonesian_multi_pipeline_id_5.5.0_3.0_1725497845941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indonesian_multi_pipeline_id_5.5.0_3.0_1725497845941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indonesian_multi_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indonesian_multi_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indonesian_multi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|864.5 MB| + +## References + +https://huggingface.co/simoneZethof/Indonesian_multi + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-inproceedings_recognizer_en.md b/docs/_posts/ahmedlone127/2024-09-05-inproceedings_recognizer_en.md new file mode 100644 index 00000000000000..890c9a7b97effa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-inproceedings_recognizer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English inproceedings_recognizer DistilBertForSequenceClassification from LaLaf93 +author: John Snow Labs +name: inproceedings_recognizer +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inproceedings_recognizer` is a English model originally trained by LaLaf93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inproceedings_recognizer_en_5.5.0_3.0_1725507202043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inproceedings_recognizer_en_5.5.0_3.0_1725507202043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("inproceedings_recognizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("inproceedings_recognizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inproceedings_recognizer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/LaLaf93/inproceedings_recognizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_en.md b/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_en.md new file mode 100644 index 00000000000000..805f5d5691a96c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English issuebert_large BertEmbeddings from gbkwon +author: John Snow Labs +name: issuebert_large +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`issuebert_large` is a English model originally trained by gbkwon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/issuebert_large_en_5.5.0_3.0_1725534177337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/issuebert_large_en_5.5.0_3.0_1725534177337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("issuebert_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("issuebert_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|issuebert_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/gbkwon/issueBERT-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_pipeline_en.md new file mode 100644 index 00000000000000..321677431febb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-issuebert_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English issuebert_large_pipeline pipeline BertEmbeddings from gbkwon +author: John Snow Labs +name: issuebert_large_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`issuebert_large_pipeline` is a English model originally trained by gbkwon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/issuebert_large_pipeline_en_5.5.0_3.0_1725534238922.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/issuebert_large_pipeline_en_5.5.0_3.0_1725534238922.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("issuebert_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("issuebert_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|issuebert_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/gbkwon/issueBERT-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_en.md b/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_en.md new file mode 100644 index 00000000000000..a90802c9c46384 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English isy503_a03 DistilBertForSequenceClassification from nicoketterer +author: John Snow Labs +name: isy503_a03 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`isy503_a03` is a English model originally trained by nicoketterer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/isy503_a03_en_5.5.0_3.0_1725579852357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/isy503_a03_en_5.5.0_3.0_1725579852357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("isy503_a03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("isy503_a03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|isy503_a03| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/nicoketterer/isy503-a03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_pipeline_en.md new file mode 100644 index 00000000000000..54d26868798465 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-isy503_a03_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English isy503_a03_pipeline pipeline DistilBertForSequenceClassification from nicoketterer +author: John Snow Labs +name: isy503_a03_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`isy503_a03_pipeline` is a English model originally trained by nicoketterer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/isy503_a03_pipeline_en_5.5.0_3.0_1725579872467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/isy503_a03_pipeline_en_5.5.0_3.0_1725579872467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("isy503_a03_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("isy503_a03_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|isy503_a03_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/nicoketterer/isy503-a03 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_it.md b/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_it.md new file mode 100644 index 00000000000000..5acfebc66c1c70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_it.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Italian italian_ner_xxl BertForTokenClassification from DeepMount00 +author: John Snow Labs +name: italian_ner_xxl +date: 2024-09-05 +tags: [it, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`italian_ner_xxl` is a Italian model originally trained by DeepMount00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/italian_ner_xxl_it_5.5.0_3.0_1725563950314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/italian_ner_xxl_it_5.5.0_3.0_1725563950314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("italian_ner_xxl","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("italian_ner_xxl", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|italian_ner_xxl| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|it| +|Size:|412.9 MB| + +## References + +https://huggingface.co/DeepMount00/Italian_NER_XXL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_pipeline_it.md new file mode 100644 index 00000000000000..5b5175f8f7a2c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-italian_ner_xxl_pipeline_it.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Italian italian_ner_xxl_pipeline pipeline BertForTokenClassification from DeepMount00 +author: John Snow Labs +name: italian_ner_xxl_pipeline +date: 2024-09-05 +tags: [it, open_source, pipeline, onnx] +task: Named Entity Recognition +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`italian_ner_xxl_pipeline` is a Italian model originally trained by DeepMount00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/italian_ner_xxl_pipeline_it_5.5.0_3.0_1725563972307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/italian_ner_xxl_pipeline_it_5.5.0_3.0_1725563972307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("italian_ner_xxl_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("italian_ner_xxl_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|italian_ner_xxl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|413.0 MB| + +## References + +https://huggingface.co/DeepMount00/Italian_NER_XXL + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-iwslt17_marian_big_ctx4_cwd4_english_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-iwslt17_marian_big_ctx4_cwd4_english_french_en.md new file mode 100644 index 00000000000000..9eb14e99f5e52f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-iwslt17_marian_big_ctx4_cwd4_english_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English iwslt17_marian_big_ctx4_cwd4_english_french MarianTransformer from context-mt +author: John Snow Labs +name: iwslt17_marian_big_ctx4_cwd4_english_french +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`iwslt17_marian_big_ctx4_cwd4_english_french` is a English model originally trained by context-mt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/iwslt17_marian_big_ctx4_cwd4_english_french_en_5.5.0_3.0_1725494579878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/iwslt17_marian_big_ctx4_cwd4_english_french_en_5.5.0_3.0_1725494579878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("iwslt17_marian_big_ctx4_cwd4_english_french","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("iwslt17_marian_big_ctx4_cwd4_english_french","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|iwslt17_marian_big_ctx4_cwd4_english_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/context-mt/iwslt17-marian-big-ctx4-cwd4-en-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_en.md new file mode 100644 index 00000000000000..497942b91be710 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English kolivia_classifier_v2 DistilBertForSequenceClassification from Agreus +author: John Snow Labs +name: kolivia_classifier_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kolivia_classifier_v2` is a English model originally trained by Agreus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kolivia_classifier_v2_en_5.5.0_3.0_1725580152980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kolivia_classifier_v2_en_5.5.0_3.0_1725580152980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("kolivia_classifier_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("kolivia_classifier_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kolivia_classifier_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/Agreus/KOlivia-classifier-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_pipeline_en.md new file mode 100644 index 00000000000000..09577852d23aa1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-kolivia_classifier_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English kolivia_classifier_v2_pipeline pipeline DistilBertForSequenceClassification from Agreus +author: John Snow Labs +name: kolivia_classifier_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kolivia_classifier_v2_pipeline` is a English model originally trained by Agreus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kolivia_classifier_v2_pipeline_en_5.5.0_3.0_1725580165347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kolivia_classifier_v2_pipeline_en_5.5.0_3.0_1725580165347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("kolivia_classifier_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("kolivia_classifier_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kolivia_classifier_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/Agreus/KOlivia-classifier-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_ko.md b/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_ko.md new file mode 100644 index 00000000000000..310b51331480f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_ko.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Korean korean_finance_news_classifier XlmRoBertaForSequenceClassification from Hyeonseo +author: John Snow Labs +name: korean_finance_news_classifier +date: 2024-09-05 +tags: [ko, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`korean_finance_news_classifier` is a Korean model originally trained by Hyeonseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/korean_finance_news_classifier_ko_5.5.0_3.0_1725514836767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/korean_finance_news_classifier_ko_5.5.0_3.0_1725514836767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("korean_finance_news_classifier","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("korean_finance_news_classifier", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|korean_finance_news_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|ko| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Hyeonseo/ko-finance_news_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_pipeline_ko.md new file mode 100644 index 00000000000000..7960098ccf0d8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-korean_finance_news_classifier_pipeline_ko.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Korean korean_finance_news_classifier_pipeline pipeline XlmRoBertaForSequenceClassification from Hyeonseo +author: John Snow Labs +name: korean_finance_news_classifier_pipeline +date: 2024-09-05 +tags: [ko, open_source, pipeline, onnx] +task: Text Classification +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`korean_finance_news_classifier_pipeline` is a Korean model originally trained by Hyeonseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/korean_finance_news_classifier_pipeline_ko_5.5.0_3.0_1725514885778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/korean_finance_news_classifier_pipeline_ko_5.5.0_3.0_1725514885778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("korean_finance_news_classifier_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("korean_finance_news_classifier_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|korean_finance_news_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Hyeonseo/ko-finance_news_classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_en.md b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_en.md new file mode 100644 index 00000000000000..6514d5e6991d4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_random_jarmac MarianTransformer from Jarmac +author: John Snow Labs +name: lab1_random_jarmac +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_jarmac` is a English model originally trained by Jarmac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_jarmac_en_5.5.0_3.0_1725544951527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_jarmac_en_5.5.0_3.0_1725544951527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_random_jarmac","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_random_jarmac","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_jarmac| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/Jarmac/lab1_random \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_pipeline_en.md new file mode 100644 index 00000000000000..4e2e6bae1f145a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_jarmac_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_random_jarmac_pipeline pipeline MarianTransformer from Jarmac +author: John Snow Labs +name: lab1_random_jarmac_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_jarmac_pipeline` is a English model originally trained by Jarmac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_jarmac_pipeline_en_5.5.0_3.0_1725544979308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_jarmac_pipeline_en_5.5.0_3.0_1725544979308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_random_jarmac_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_random_jarmac_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_jarmac_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/Jarmac/lab1_random + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_en.md b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_en.md new file mode 100644 index 00000000000000..5a8d5efb1f85a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_random_robinysh MarianTransformer from robinysh +author: John Snow Labs +name: lab1_random_robinysh +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_robinysh` is a English model originally trained by robinysh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_robinysh_en_5.5.0_3.0_1725545314925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_robinysh_en_5.5.0_3.0_1725545314925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_random_robinysh","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_random_robinysh","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_robinysh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|509.5 MB| + +## References + +https://huggingface.co/robinysh/lab1_random \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_pipeline_en.md new file mode 100644 index 00000000000000..024be264d352c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lab1_random_robinysh_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_random_robinysh_pipeline pipeline MarianTransformer from robinysh +author: John Snow Labs +name: lab1_random_robinysh_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_robinysh_pipeline` is a English model originally trained by robinysh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_robinysh_pipeline_en_5.5.0_3.0_1725545347981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_robinysh_pipeline_en_5.5.0_3.0_1725545347981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_random_robinysh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_random_robinysh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_robinysh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|510.1 MB| + +## References + +https://huggingface.co/robinysh/lab1_random + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lab1_true_random_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-lab1_true_random_pipeline_en.md new file mode 100644 index 00000000000000..2ad9333f65b157 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lab1_true_random_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_true_random_pipeline pipeline MarianTransformer from cheyannelam +author: John Snow Labs +name: lab1_true_random_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_true_random_pipeline` is a English model originally trained by cheyannelam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_true_random_pipeline_en_5.5.0_3.0_1725494789050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_true_random_pipeline_en_5.5.0_3.0_1725494789050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_true_random_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_true_random_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_true_random_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/cheyannelam/lab1_true_random + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lavan_en.md b/docs/_posts/ahmedlone127/2024-09-05-lavan_en.md new file mode 100644 index 00000000000000..9f07947e68e6b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lavan_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lavan RoBertaForQuestionAnswering from LavanyaM +author: John Snow Labs +name: lavan +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lavan` is a English model originally trained by LavanyaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lavan_en_5.5.0_3.0_1725576417387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lavan_en_5.5.0_3.0_1725576417387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("lavan","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("lavan", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lavan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/LavanyaM/lavan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-lavan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-lavan_pipeline_en.md new file mode 100644 index 00000000000000..90b63eec143d93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-lavan_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lavan_pipeline pipeline RoBertaForQuestionAnswering from LavanyaM +author: John Snow Labs +name: lavan_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lavan_pipeline` is a English model originally trained by LavanyaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lavan_pipeline_en_5.5.0_3.0_1725576440835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lavan_pipeline_en_5.5.0_3.0_1725576440835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lavan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lavan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lavan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/LavanyaM/lavan + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_es.md new file mode 100644 index 00000000000000..96c4421c9aa4be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish legal_longformer_base_8192_spanish RoBertaEmbeddings from mrm8488 +author: John Snow Labs +name: legal_longformer_base_8192_spanish +date: 2024-09-05 +tags: [es, open_source, onnx, embeddings, roberta] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_longformer_base_8192_spanish` is a Castilian, Spanish model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_longformer_base_8192_spanish_es_5.5.0_3.0_1725566575785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_longformer_base_8192_spanish_es_5.5.0_3.0_1725566575785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("legal_longformer_base_8192_spanish","es") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("legal_longformer_base_8192_spanish","es") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_longformer_base_8192_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|es| +|Size:|490.6 MB| + +## References + +https://huggingface.co/mrm8488/legal-longformer-base-8192-spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_pipeline_es.md new file mode 100644 index 00000000000000..f744d48fa3964c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-legal_longformer_base_8192_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish legal_longformer_base_8192_spanish_pipeline pipeline RoBertaEmbeddings from mrm8488 +author: John Snow Labs +name: legal_longformer_base_8192_spanish_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_longformer_base_8192_spanish_pipeline` is a Castilian, Spanish model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_longformer_base_8192_spanish_pipeline_es_5.5.0_3.0_1725566601318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_longformer_base_8192_spanish_pipeline_es_5.5.0_3.0_1725566601318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("legal_longformer_base_8192_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("legal_longformer_base_8192_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_longformer_base_8192_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|490.7 MB| + +## References + +https://huggingface.co/mrm8488/legal-longformer-base-8192-spanish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-linkbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-linkbert_pipeline_en.md new file mode 100644 index 00000000000000..dfda60825c7467 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-linkbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English linkbert_pipeline pipeline BertForTokenClassification from dejanseo +author: John Snow Labs +name: linkbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_pipeline` is a English model originally trained by dejanseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_pipeline_en_5.5.0_3.0_1725516134609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_pipeline_en_5.5.0_3.0_1725516134609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("linkbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("linkbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/dejanseo/LinkBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_lt.md b/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_lt.md new file mode 100644 index 00000000000000..2b536afd38caf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_lt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Lithuanian litberta_uncased RoBertaEmbeddings from jkeruotis +author: John Snow Labs +name: litberta_uncased +date: 2024-09-05 +tags: [lt, open_source, onnx, embeddings, roberta] +task: Embeddings +language: lt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`litberta_uncased` is a Lithuanian model originally trained by jkeruotis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/litberta_uncased_lt_5.5.0_3.0_1725578667929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/litberta_uncased_lt_5.5.0_3.0_1725578667929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("litberta_uncased","lt") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("litberta_uncased","lt") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|litberta_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|lt| +|Size:|689.2 MB| + +## References + +https://huggingface.co/jkeruotis/LitBERTa-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_pipeline_lt.md b/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_pipeline_lt.md new file mode 100644 index 00000000000000..7d13d9a859dbf6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-litberta_uncased_pipeline_lt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Lithuanian litberta_uncased_pipeline pipeline RoBertaEmbeddings from jkeruotis +author: John Snow Labs +name: litberta_uncased_pipeline +date: 2024-09-05 +tags: [lt, open_source, pipeline, onnx] +task: Embeddings +language: lt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`litberta_uncased_pipeline` is a Lithuanian model originally trained by jkeruotis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/litberta_uncased_pipeline_lt_5.5.0_3.0_1725578703248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/litberta_uncased_pipeline_lt_5.5.0_3.0_1725578703248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("litberta_uncased_pipeline", lang = "lt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("litberta_uncased_pipeline", lang = "lt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|litberta_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|lt| +|Size:|689.2 MB| + +## References + +https://huggingface.co/jkeruotis/LitBERTa-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_en.md b/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_en.md new file mode 100644 index 00000000000000..7c566de7bb8074 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ltrc_albert AlbertEmbeddings from ltrctelugu +author: John Snow Labs +name: ltrc_albert +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, albert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ltrc_albert` is a English model originally trained by ltrctelugu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ltrc_albert_en_5.5.0_3.0_1725528237500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ltrc_albert_en_5.5.0_3.0_1725528237500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("ltrc_albert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("ltrc_albert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ltrc_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/ltrctelugu/ltrc-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_pipeline_en.md new file mode 100644 index 00000000000000..d54bd2818d5565 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ltrc_albert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ltrc_albert_pipeline pipeline AlbertEmbeddings from ltrctelugu +author: John Snow Labs +name: ltrc_albert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ltrc_albert_pipeline` is a English model originally trained by ltrctelugu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ltrc_albert_pipeline_en_5.5.0_3.0_1725528240117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ltrc_albert_pipeline_en_5.5.0_3.0_1725528240117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ltrc_albert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ltrc_albert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ltrc_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/ltrctelugu/ltrc-albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-m8_mlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-m8_mlm_pipeline_en.md new file mode 100644 index 00000000000000..f2cba9bf3c9bc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-m8_mlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English m8_mlm_pipeline pipeline AlbertEmbeddings from S2312dal +author: John Snow Labs +name: m8_mlm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m8_mlm_pipeline` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m8_mlm_pipeline_en_5.5.0_3.0_1725528244870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m8_mlm_pipeline_en_5.5.0_3.0_1725528244870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("m8_mlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("m8_mlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m8_mlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/S2312dal/M8_MLM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_en.md new file mode 100644 index 00000000000000..fa246995a1ad65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English m_bert_distil_40_finetune_intent_classification DistilBertForSequenceClassification from junejae +author: John Snow Labs +name: m_bert_distil_40_finetune_intent_classification +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m_bert_distil_40_finetune_intent_classification` is a English model originally trained by junejae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m_bert_distil_40_finetune_intent_classification_en_5.5.0_3.0_1725507585654.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m_bert_distil_40_finetune_intent_classification_en_5.5.0_3.0_1725507585654.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("m_bert_distil_40_finetune_intent_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("m_bert_distil_40_finetune_intent_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m_bert_distil_40_finetune_intent_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/junejae/M-BERT-Distil-40_finetune_intent_classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_pipeline_en.md new file mode 100644 index 00000000000000..67d90a846d4d4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-m_bert_distil_40_finetune_intent_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English m_bert_distil_40_finetune_intent_classification_pipeline pipeline DistilBertForSequenceClassification from junejae +author: John Snow Labs +name: m_bert_distil_40_finetune_intent_classification_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m_bert_distil_40_finetune_intent_classification_pipeline` is a English model originally trained by junejae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m_bert_distil_40_finetune_intent_classification_pipeline_en_5.5.0_3.0_1725507610474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m_bert_distil_40_finetune_intent_classification_pipeline_en_5.5.0_3.0_1725507610474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("m_bert_distil_40_finetune_intent_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("m_bert_distil_40_finetune_intent_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m_bert_distil_40_finetune_intent_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/junejae/M-BERT-Distil-40_finetune_intent_classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_32k_fr.md b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_32k_fr.md new file mode 100644 index 00000000000000..6c9ca577a2b3c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_32k_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French malbert_base_cased_32k AlbertEmbeddings from cservan +author: John Snow Labs +name: malbert_base_cased_32k +date: 2024-09-05 +tags: [fr, open_source, onnx, embeddings, albert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malbert_base_cased_32k` is a French model originally trained by cservan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malbert_base_cased_32k_fr_5.5.0_3.0_1725568422984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malbert_base_cased_32k_fr_5.5.0_3.0_1725568422984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("malbert_base_cased_32k","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("malbert_base_cased_32k","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malbert_base_cased_32k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|fr| +|Size:|27.4 MB| + +## References + +https://huggingface.co/cservan/malbert-base-cased-32k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_fr.md b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_fr.md new file mode 100644 index 00000000000000..e72e24077788f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French malbert_base_cased_64k AlbertEmbeddings from cservan +author: John Snow Labs +name: malbert_base_cased_64k +date: 2024-09-05 +tags: [fr, open_source, onnx, embeddings, albert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malbert_base_cased_64k` is a French model originally trained by cservan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malbert_base_cased_64k_fr_5.5.0_3.0_1725568603414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malbert_base_cased_64k_fr_5.5.0_3.0_1725568603414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("malbert_base_cased_64k","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("malbert_base_cased_64k","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malbert_base_cased_64k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|fr| +|Size:|37.5 MB| + +## References + +https://huggingface.co/cservan/malbert-base-cased-64k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_pipeline_fr.md new file mode 100644 index 00000000000000..9921785ba3cdca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-malbert_base_cased_64k_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French malbert_base_cased_64k_pipeline pipeline AlbertEmbeddings from cservan +author: John Snow Labs +name: malbert_base_cased_64k_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malbert_base_cased_64k_pipeline` is a French model originally trained by cservan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malbert_base_cased_64k_pipeline_fr_5.5.0_3.0_1725568614463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malbert_base_cased_64k_pipeline_fr_5.5.0_3.0_1725568614463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("malbert_base_cased_64k_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("malbert_base_cased_64k_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malbert_base_cased_64k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|37.5 MB| + +## References + +https://huggingface.co/cservan/malbert-base-cased-64k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_mr.md b/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_mr.md new file mode 100644 index 00000000000000..a28f663db5514e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_mr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Marathi marathi_albert AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_albert +date: 2024-09-05 +tags: [mr, open_source, onnx, embeddings, albert] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_albert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_albert_mr_5.5.0_3.0_1725528313858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_albert_mr_5.5.0_3.0_1725528313858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("marathi_albert","mr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("marathi_albert","mr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|mr| +|Size:|42.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_pipeline_mr.md new file mode 100644 index 00000000000000..4d6bf3557f03f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marathi_albert_pipeline_mr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Marathi marathi_albert_pipeline pipeline AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_albert_pipeline +date: 2024-09-05 +tags: [mr, open_source, pipeline, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_albert_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_albert_pipeline_mr_5.5.0_3.0_1725528316463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_albert_pipeline_mr_5.5.0_3.0_1725528316463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marathi_albert_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marathi_albert_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|42.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_en.md new file mode 100644 index 00000000000000..cd1afbe1f8be11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_english_german_test MarianTransformer from alina1997 +author: John Snow Labs +name: marian_english_german_test +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_english_german_test` is a English model originally trained by alina1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_english_german_test_en_5.5.0_3.0_1725545283823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_english_german_test_en_5.5.0_3.0_1725545283823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_english_german_test","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_english_german_test","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_english_german_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/alina1997/marian_en_de_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_pipeline_en.md new file mode 100644 index 00000000000000..5dfaceec5cf448 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_english_german_test_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_english_german_test_pipeline pipeline MarianTransformer from alina1997 +author: John Snow Labs +name: marian_english_german_test_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_english_german_test_pipeline` is a English model originally trained by alina1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_english_german_test_pipeline_en_5.5.0_3.0_1725545316536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_english_german_test_pipeline_en_5.5.0_3.0_1725545316536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_english_german_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_english_german_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_english_german_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|500.0 MB| + +## References + +https://huggingface.co/alina1997/marian_en_de_test + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en.md new file mode 100644 index 00000000000000..28d3358ec883c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh MarianTransformer from magnustragardh +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh` is a English model originally trained by magnustragardh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en_5.5.0_3.0_1725545357234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh_en_5.5.0_3.0_1725545357234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_magnustragardh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/magnustragardh/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en.md new file mode 100644 index 00000000000000..4a9587f1120119 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie MarianTransformer from mithegooie +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie` is a English model originally trained by mithegooie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en_5.5.0_3.0_1725545522025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_en_5.5.0_3.0_1725545522025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|507.8 MB| + +## References + +https://huggingface.co/mithegooie/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en.md new file mode 100644 index 00000000000000..81df7b5c4e40d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline pipeline MarianTransformer from mithegooie +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline` is a English model originally trained by mithegooie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en_5.5.0_3.0_1725545549595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline_en_5.5.0_3.0_1725545549595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_mithegooie_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.4 MB| + +## References + +https://huggingface.co/mithegooie/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en.md new file mode 100644 index 00000000000000..e747daa717e44a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan MarianTransformer from DouglasChan +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan` is a English model originally trained by DouglasChan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en_5.5.0_3.0_1725546270191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_en_5.5.0_3.0_1725546270191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/DouglasChan/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en.md new file mode 100644 index 00000000000000..e5421ee1807da9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline pipeline MarianTransformer from DouglasChan +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline` is a English model originally trained by DouglasChan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en_5.5.0_3.0_1725546296797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline_en_5.5.0_3.0_1725546296797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_douglaschan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/DouglasChan/marian-finetuned-kde4-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en.md new file mode 100644 index 00000000000000..577500e0778ce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1 MarianTransformer from sephinroth +author: John Snow Labs +name: marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1` is a English model originally trained by sephinroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en_5.5.0_3.0_1725546154447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_en_5.5.0_3.0_1725546154447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|515.2 MB| + +## References + +https://huggingface.co/sephinroth/marian-finetuned-kftt-ja-to-en-test1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en.md new file mode 100644 index 00000000000000..4eba91970b690a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline pipeline MarianTransformer from sephinroth +author: John Snow Labs +name: marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline` is a English model originally trained by sephinroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en_5.5.0_3.0_1725546181897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline_en_5.5.0_3.0_1725546181897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kftt_japanese_tonga_tonga_islands_english_test1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|515.8 MB| + +## References + +https://huggingface.co/sephinroth/marian-finetuned-kftt-ja-to-en-test1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_en.md new file mode 100644 index 00000000000000..b5e08996cb0d8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marianmt_ufal_english_french MarianTransformer from irenelizihui +author: John Snow Labs +name: marianmt_ufal_english_french +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmt_ufal_english_french` is a English model originally trained by irenelizihui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_french_en_5.5.0_3.0_1725545192577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_french_en_5.5.0_3.0_1725545192577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marianmt_ufal_english_french","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marianmt_ufal_english_french","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmt_ufal_english_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|506.8 MB| + +## References + +https://huggingface.co/irenelizihui/MarianMT_UFAL_en_fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_pipeline_en.md new file mode 100644 index 00000000000000..d612c3d6e43cb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-marianmt_ufal_english_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marianmt_ufal_english_french_pipeline pipeline MarianTransformer from irenelizihui +author: John Snow Labs +name: marianmt_ufal_english_french_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmt_ufal_english_french_pipeline` is a English model originally trained by irenelizihui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_french_pipeline_en_5.5.0_3.0_1725545220126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_french_pipeline_en_5.5.0_3.0_1725545220126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marianmt_ufal_english_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marianmt_ufal_english_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmt_ufal_english_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.3 MB| + +## References + +https://huggingface.co/irenelizihui/MarianMT_UFAL_en_fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_en.md new file mode 100644 index 00000000000000..5d0f1cae2690e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_base_v3_6 DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_6 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_6` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_6_en_5.5.0_3.0_1725561416397.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_6_en_5.5.0_3.0_1725561416397.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|832.6 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_pipeline_en.md new file mode 100644 index 00000000000000..191114f7f7471a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_base_v3_6_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_base_v3_6_pipeline pipeline DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_6_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_6_pipeline` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_6_pipeline_en_5.5.0_3.0_1725561500796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_6_pipeline_en_5.5.0_3.0_1725561500796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_base_v3_6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_base_v3_6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|832.6 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_6 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pipeline_pt.md new file mode 100644 index 00000000000000..35023b5b8abac6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese mdeberta_v3_base_assin_entailment_pipeline pipeline DeBertaForSequenceClassification from ruanchaves +author: John Snow Labs +name: mdeberta_v3_base_assin_entailment_pipeline +date: 2024-09-05 +tags: [pt, open_source, pipeline, onnx] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_assin_entailment_pipeline` is a Portuguese model originally trained by ruanchaves. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_entailment_pipeline_pt_5.5.0_3.0_1725562842205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_entailment_pipeline_pt_5.5.0_3.0_1725562842205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_assin_entailment_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_assin_entailment_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_assin_entailment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|851.6 MB| + +## References + +https://huggingface.co/ruanchaves/mdeberta-v3-base-assin-entailment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pt.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pt.md new file mode 100644 index 00000000000000..99e701cbe7de38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_assin_entailment_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese mdeberta_v3_base_assin_entailment DeBertaForSequenceClassification from ruanchaves +author: John Snow Labs +name: mdeberta_v3_base_assin_entailment +date: 2024-09-05 +tags: [pt, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_assin_entailment` is a Portuguese model originally trained by ruanchaves. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_entailment_pt_5.5.0_3.0_1725562785526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_entailment_pt_5.5.0_3.0_1725562785526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_assin_entailment","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_assin_entailment", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_assin_entailment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|pt| +|Size:|851.6 MB| + +## References + +https://huggingface.co/ruanchaves/mdeberta-v3-base-assin-entailment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_caresa_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_caresa_pipeline_es.md new file mode 100644 index 00000000000000..267389122e32e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_caresa_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish mdeberta_v3_base_caresa_pipeline pipeline DeBertaForSequenceClassification from IIC +author: John Snow Labs +name: mdeberta_v3_base_caresa_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_caresa_pipeline` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_caresa_pipeline_es_5.5.0_3.0_1725561447825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_caresa_pipeline_es_5.5.0_3.0_1725561447825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_caresa_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_caresa_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_caresa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|794.5 MB| + +## References + +https://huggingface.co/IIC/mdeberta-v3-base-caresA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_en.md new file mode 100644 index 00000000000000..e479735443d318 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_qqp_1 DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qqp_1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qqp_1` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_1_en_5.5.0_3.0_1725562489902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_1_en_5.5.0_3.0_1725562489902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qqp_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qqp_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qqp_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|834.7 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qqp-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_pipeline_en.md new file mode 100644 index 00000000000000..9832185b30f9ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_qqp_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_qqp_1_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qqp_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qqp_1_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_1_pipeline_en_5.5.0_3.0_1725562623768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_1_pipeline_en_5.5.0_3.0_1725562623768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_qqp_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_qqp_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qqp_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.8 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qqp-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_pipeline_xx.md new file mode 100644 index 00000000000000..8850781e9a87b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual mdeberta_v3_base_triplet_critic_xnli_pipeline pipeline DeBertaForSequenceClassification from Babelscape +author: John Snow Labs +name: mdeberta_v3_base_triplet_critic_xnli_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_triplet_critic_xnli_pipeline` is a Multilingual model originally trained by Babelscape. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_triplet_critic_xnli_pipeline_xx_5.5.0_3.0_1725561952627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_triplet_critic_xnli_pipeline_xx_5.5.0_3.0_1725561952627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_triplet_critic_xnli_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_triplet_critic_xnli_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_triplet_critic_xnli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|907.5 MB| + +## References + +https://huggingface.co/Babelscape/mdeberta-v3-base-triplet-critic-xnli + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_xx.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_xx.md new file mode 100644 index 00000000000000..f0161d154f7ff8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_base_triplet_critic_xnli_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual mdeberta_v3_base_triplet_critic_xnli DeBertaForSequenceClassification from Babelscape +author: John Snow Labs +name: mdeberta_v3_base_triplet_critic_xnli +date: 2024-09-05 +tags: [xx, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_triplet_critic_xnli` is a Multilingual model originally trained by Babelscape. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_triplet_critic_xnli_xx_5.5.0_3.0_1725561849388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_triplet_critic_xnli_xx_5.5.0_3.0_1725561849388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_triplet_critic_xnli","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_triplet_critic_xnli", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_triplet_critic_xnli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|907.5 MB| + +## References + +https://huggingface.co/Babelscape/mdeberta-v3-base-triplet-critic-xnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en.md new file mode 100644 index 00000000000000..a8ddc390524f1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation DeBertaForSequenceClassification from Sleoruiz +author: John Snow Labs +name: mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation` is a English model originally trained by Sleoruiz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en_5.5.0_3.0_1725561526271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_en_5.5.0_3.0_1725561526271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|804.2 MB| + +## References + +https://huggingface.co/Sleoruiz/mdeberta-v3-fine-tuned-text-classification-SL-data-augmentation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en.md new file mode 100644 index 00000000000000..f6490bf821d030 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline pipeline DeBertaForSequenceClassification from Sleoruiz +author: John Snow Labs +name: mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline` is a English model originally trained by Sleoruiz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en_5.5.0_3.0_1725561586384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en_5.5.0_3.0_1725561586384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_fine_tuned_text_classification_slovene_data_augmentation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|804.2 MB| + +## References + +https://huggingface.co/Sleoruiz/mdeberta-v3-fine-tuned-text-classification-SL-data-augmentation + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_pipeline_tr.md new file mode 100644 index 00000000000000..e95c0de4eaf551 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish mdebertav3_subjectivity_turkish_pipeline pipeline DeBertaForSequenceClassification from GroNLP +author: John Snow Labs +name: mdebertav3_subjectivity_turkish_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Text Classification +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdebertav3_subjectivity_turkish_pipeline` is a Turkish model originally trained by GroNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdebertav3_subjectivity_turkish_pipeline_tr_5.5.0_3.0_1725561768909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdebertav3_subjectivity_turkish_pipeline_tr_5.5.0_3.0_1725561768909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdebertav3_subjectivity_turkish_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdebertav3_subjectivity_turkish_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdebertav3_subjectivity_turkish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|860.5 MB| + +## References + +https://huggingface.co/GroNLP/mdebertav3-subjectivity-turkish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_tr.md b/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_tr.md new file mode 100644 index 00000000000000..44fba43b8c03e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdebertav3_subjectivity_turkish_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish mdebertav3_subjectivity_turkish DeBertaForSequenceClassification from GroNLP +author: John Snow Labs +name: mdebertav3_subjectivity_turkish +date: 2024-09-05 +tags: [tr, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdebertav3_subjectivity_turkish` is a Turkish model originally trained by GroNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdebertav3_subjectivity_turkish_tr_5.5.0_3.0_1725561707892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdebertav3_subjectivity_turkish_tr_5.5.0_3.0_1725561707892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdebertav3_subjectivity_turkish","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdebertav3_subjectivity_turkish", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdebertav3_subjectivity_turkish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|tr| +|Size:|860.4 MB| + +## References + +https://huggingface.co/GroNLP/mdebertav3-subjectivity-turkish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_en.md new file mode 100644 index 00000000000000..2a90073597bec9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdt_ie_re_baseline XlmRoBertaForSequenceClassification from OSainz +author: John Snow Labs +name: mdt_ie_re_baseline +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdt_ie_re_baseline` is a English model originally trained by OSainz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdt_ie_re_baseline_en_5.5.0_3.0_1725529013613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdt_ie_re_baseline_en_5.5.0_3.0_1725529013613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("mdt_ie_re_baseline","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("mdt_ie_re_baseline", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdt_ie_re_baseline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|795.6 MB| + +## References + +https://huggingface.co/OSainz/mdt-ie-re-baseline \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_pipeline_en.md new file mode 100644 index 00000000000000..6da22843a93630 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mdt_ie_re_baseline_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdt_ie_re_baseline_pipeline pipeline XlmRoBertaForSequenceClassification from OSainz +author: John Snow Labs +name: mdt_ie_re_baseline_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdt_ie_re_baseline_pipeline` is a English model originally trained by OSainz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdt_ie_re_baseline_pipeline_en_5.5.0_3.0_1725529141862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdt_ie_re_baseline_pipeline_en_5.5.0_3.0_1725529141862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdt_ie_re_baseline_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdt_ie_re_baseline_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdt_ie_re_baseline_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|795.6 MB| + +## References + +https://huggingface.co/OSainz/mdt-ie-re-baseline + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-medical_enes_basque_en.md b/docs/_posts/ahmedlone127/2024-09-05-medical_enes_basque_en.md new file mode 100644 index 00000000000000..97dba665fc5b12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-medical_enes_basque_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English medical_enes_basque MarianTransformer from anegda +author: John Snow Labs +name: medical_enes_basque +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_enes_basque` is a English model originally trained by anegda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_enes_basque_en_5.5.0_3.0_1725545450172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_enes_basque_en_5.5.0_3.0_1725545450172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("medical_enes_basque","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("medical_enes_basque","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_enes_basque| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|137.1 MB| + +## References + +https://huggingface.co/anegda/medical_enes-eu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_en.md b/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_en.md new file mode 100644 index 00000000000000..2e081b8c220f84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English medical_english_basque MarianTransformer from anegda +author: John Snow Labs +name: medical_english_basque +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_english_basque` is a English model originally trained by anegda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_english_basque_en_5.5.0_3.0_1725545978140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_english_basque_en_5.5.0_3.0_1725545978140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("medical_english_basque","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("medical_english_basque","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_english_basque| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|137.0 MB| + +## References + +https://huggingface.co/anegda/medical_en-eu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_pipeline_en.md new file mode 100644 index 00000000000000..328701dde90216 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-medical_english_basque_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English medical_english_basque_pipeline pipeline MarianTransformer from anegda +author: John Snow Labs +name: medical_english_basque_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_english_basque_pipeline` is a English model originally trained by anegda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_english_basque_pipeline_en_5.5.0_3.0_1725546021813.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_english_basque_pipeline_en_5.5.0_3.0_1725546021813.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("medical_english_basque_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("medical_english_basque_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_english_basque_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|137.6 MB| + +## References + +https://huggingface.co/anegda/medical_en-eu + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-medidalroberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-medidalroberta_pipeline_en.md new file mode 100644 index 00000000000000..e554840f47ff23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-medidalroberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English medidalroberta_pipeline pipeline XlmRoBertaForSequenceClassification from achDev +author: John Snow Labs +name: medidalroberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medidalroberta_pipeline` is a English model originally trained by achDev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medidalroberta_pipeline_en_5.5.0_3.0_1725535983793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medidalroberta_pipeline_en_5.5.0_3.0_1725535983793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("medidalroberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("medidalroberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medidalroberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|797.5 MB| + +## References + +https://huggingface.co/achDev/medidalRoberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mentalberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-mentalberta_en.md new file mode 100644 index 00000000000000..61a888095987f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mentalberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mentalberta RoBertaEmbeddings from dlb +author: John Snow Labs +name: mentalberta +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mentalberta` is a English model originally trained by dlb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mentalberta_en_5.5.0_3.0_1725572488262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mentalberta_en_5.5.0_3.0_1725572488262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("mentalberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("mentalberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mentalberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/dlb/MentalBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mentalberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mentalberta_pipeline_en.md new file mode 100644 index 00000000000000..3e3942265822c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mentalberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mentalberta_pipeline pipeline RoBertaEmbeddings from dlb +author: John Snow Labs +name: mentalberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mentalberta_pipeline` is a English model originally trained by dlb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mentalberta_pipeline_en_5.5.0_3.0_1725572553382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mentalberta_pipeline_en_5.5.0_3.0_1725572553382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mentalberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mentalberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mentalberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/dlb/MentalBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_en.md b/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_en.md new file mode 100644 index 00000000000000..9710473c12961a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mlm_acutal_bangla_hate BertEmbeddings from BivasDas +author: John Snow Labs +name: mlm_acutal_bangla_hate +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_acutal_bangla_hate` is a English model originally trained by BivasDas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_acutal_bangla_hate_en_5.5.0_3.0_1725553002496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_acutal_bangla_hate_en_5.5.0_3.0_1725553002496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("mlm_acutal_bangla_hate","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("mlm_acutal_bangla_hate","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_acutal_bangla_hate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|614.9 MB| + +## References + +https://huggingface.co/BivasDas/mlm-acutal-bangla-hate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_pipeline_en.md new file mode 100644 index 00000000000000..67b2e894c85342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mlm_acutal_bangla_hate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mlm_acutal_bangla_hate_pipeline pipeline BertEmbeddings from BivasDas +author: John Snow Labs +name: mlm_acutal_bangla_hate_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_acutal_bangla_hate_pipeline` is a English model originally trained by BivasDas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_acutal_bangla_hate_pipeline_en_5.5.0_3.0_1725553034995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_acutal_bangla_hate_pipeline_en_5.5.0_3.0_1725553034995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mlm_acutal_bangla_hate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mlm_acutal_bangla_hate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_acutal_bangla_hate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|614.9 MB| + +## References + +https://huggingface.co/BivasDas/mlm-acutal-bangla-hate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-model11epochs_en.md b/docs/_posts/ahmedlone127/2024-09-05-model11epochs_en.md new file mode 100644 index 00000000000000..607ac14a240afe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-model11epochs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model11epochs DistilBertForSequenceClassification from MartaTT +author: John Snow Labs +name: model11epochs +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model11epochs` is a English model originally trained by MartaTT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model11epochs_en_5.5.0_3.0_1725507536965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model11epochs_en_5.5.0_3.0_1725507536965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("model11epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("model11epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model11epochs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MartaTT/model11epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-model11epochs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-model11epochs_pipeline_en.md new file mode 100644 index 00000000000000..73f500d2d9241a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-model11epochs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model11epochs_pipeline pipeline DistilBertForSequenceClassification from MartaTT +author: John Snow Labs +name: model11epochs_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model11epochs_pipeline` is a English model originally trained by MartaTT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model11epochs_pipeline_en_5.5.0_3.0_1725507549205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model11epochs_pipeline_en_5.5.0_3.0_1725507549205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model11epochs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model11epochs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model11epochs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/MartaTT/model11epochs + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-model_albert_5000_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-model_albert_5000_1_en.md new file mode 100644 index 00000000000000..4907ab216695b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-model_albert_5000_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model_albert_5000_1 AlbertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: model_albert_5000_1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_albert_5000_1` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_albert_5000_1_en_5.5.0_3.0_1725525218367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_albert_5000_1_en_5.5.0_3.0_1725525218367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("model_albert_5000_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("model_albert_5000_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_albert_5000_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/KalaiselvanD/model_albert_5000_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-model_arebmann_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-model_arebmann_pipeline_en.md new file mode 100644 index 00000000000000..d6f1cfdf51f2a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-model_arebmann_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_arebmann_pipeline pipeline DistilBertForTokenClassification from arebmann +author: John Snow Labs +name: model_arebmann_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_arebmann_pipeline` is a English model originally trained by arebmann. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_arebmann_pipeline_en_5.5.0_3.0_1725506139425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_arebmann_pipeline_en_5.5.0_3.0_1725506139425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_arebmann_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_arebmann_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_arebmann_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/arebmann/model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_en.md b/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_en.md new file mode 100644 index 00000000000000..63c6e73f3f7d9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English monotransquest_hter_english_czech_pharmaceutical XlmRoBertaForSequenceClassification from TransQuest +author: John Snow Labs +name: monotransquest_hter_english_czech_pharmaceutical +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`monotransquest_hter_english_czech_pharmaceutical` is a English model originally trained by TransQuest. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/monotransquest_hter_english_czech_pharmaceutical_en_5.5.0_3.0_1725527270160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/monotransquest_hter_english_czech_pharmaceutical_en_5.5.0_3.0_1725527270160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("monotransquest_hter_english_czech_pharmaceutical","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("monotransquest_hter_english_czech_pharmaceutical", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|monotransquest_hter_english_czech_pharmaceutical| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|789.7 MB| + +## References + +https://huggingface.co/TransQuest/monotransquest-hter-en_cs-pharmaceutical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_pipeline_en.md new file mode 100644 index 00000000000000..feacb20821f55c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-monotransquest_hter_english_czech_pharmaceutical_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English monotransquest_hter_english_czech_pharmaceutical_pipeline pipeline XlmRoBertaForSequenceClassification from TransQuest +author: John Snow Labs +name: monotransquest_hter_english_czech_pharmaceutical_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`monotransquest_hter_english_czech_pharmaceutical_pipeline` is a English model originally trained by TransQuest. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/monotransquest_hter_english_czech_pharmaceutical_pipeline_en_5.5.0_3.0_1725527411016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/monotransquest_hter_english_czech_pharmaceutical_pipeline_en_5.5.0_3.0_1725527411016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("monotransquest_hter_english_czech_pharmaceutical_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("monotransquest_hter_english_czech_pharmaceutical_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|monotransquest_hter_english_czech_pharmaceutical_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|789.7 MB| + +## References + +https://huggingface.co/TransQuest/monotransquest-hter-en_cs-pharmaceutical + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_en.md new file mode 100644 index 00000000000000..1645b376f83df4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English movie_roberta_base RoBertaEmbeddings from thatdramebaazguy +author: John Snow Labs +name: movie_roberta_base +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`movie_roberta_base` is a English model originally trained by thatdramebaazguy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/movie_roberta_base_en_5.5.0_3.0_1725566430704.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/movie_roberta_base_en_5.5.0_3.0_1725566430704.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("movie_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("movie_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|movie_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/thatdramebaazguy/movie-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..80bcd14c48b298 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-movie_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English movie_roberta_base_pipeline pipeline RoBertaEmbeddings from thatdramebaazguy +author: John Snow Labs +name: movie_roberta_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`movie_roberta_base_pipeline` is a English model originally trained by thatdramebaazguy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/movie_roberta_base_pipeline_en_5.5.0_3.0_1725566455015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/movie_roberta_base_pipeline_en_5.5.0_3.0_1725566455015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("movie_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("movie_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|movie_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/thatdramebaazguy/movie-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_adaptation_mitigation_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_adaptation_mitigation_classifier_pipeline_en.md new file mode 100644 index 00000000000000..3f8b58635225ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_adaptation_mitigation_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_adaptation_mitigation_classifier_pipeline pipeline MPNetForSequenceClassification from ppsingh +author: John Snow Labs +name: mpnet_adaptation_mitigation_classifier_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_adaptation_mitigation_classifier_pipeline` is a English model originally trained by ppsingh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_adaptation_mitigation_classifier_pipeline_en_5.5.0_3.0_1725575411775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_adaptation_mitigation_classifier_pipeline_en_5.5.0_3.0_1725575411775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_adaptation_mitigation_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_adaptation_mitigation_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_adaptation_mitigation_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/ppsingh/mpnet-adaptation_mitigation-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_en.md new file mode 100644 index 00000000000000..6bc91177af42a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mpnet_base_airlines_news_multi_label MPNetForSequenceClassification from dahe827 +author: John Snow Labs +name: mpnet_base_airlines_news_multi_label +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_airlines_news_multi_label` is a English model originally trained by dahe827. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_airlines_news_multi_label_en_5.5.0_3.0_1725575237214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_airlines_news_multi_label_en_5.5.0_3.0_1725575237214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_airlines_news_multi_label","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_airlines_news_multi_label", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_airlines_news_multi_label| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|263.5 MB| + +## References + +https://huggingface.co/dahe827/mpnet-base-airlines-news-multi-label \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_pipeline_en.md new file mode 100644 index 00000000000000..73d59038dfc85e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_airlines_news_multi_label_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_base_airlines_news_multi_label_pipeline pipeline MPNetForSequenceClassification from dahe827 +author: John Snow Labs +name: mpnet_base_airlines_news_multi_label_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_airlines_news_multi_label_pipeline` is a English model originally trained by dahe827. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_airlines_news_multi_label_pipeline_en_5.5.0_3.0_1725575311713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_airlines_news_multi_label_pipeline_en_5.5.0_3.0_1725575311713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_airlines_news_multi_label_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_airlines_news_multi_label_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_airlines_news_multi_label_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|263.5 MB| + +## References + +https://huggingface.co/dahe827/mpnet-base-airlines-news-multi-label + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_en.md new file mode 100644 index 00000000000000..8957338e86374f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mpnet_base_edu_classifier MPNetForSequenceClassification from pszemraj +author: John Snow Labs +name: mpnet_base_edu_classifier +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_edu_classifier` is a English model originally trained by pszemraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_edu_classifier_en_5.5.0_3.0_1725574909750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_edu_classifier_en_5.5.0_3.0_1725574909750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_edu_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_edu_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_edu_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/pszemraj/mpnet-base-edu-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_pipeline_en.md new file mode 100644 index 00000000000000..0a89b4f6395a31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_edu_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_base_edu_classifier_pipeline pipeline MPNetForSequenceClassification from pszemraj +author: John Snow Labs +name: mpnet_base_edu_classifier_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_edu_classifier_pipeline` is a English model originally trained by pszemraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_edu_classifier_pipeline_en_5.5.0_3.0_1725574934731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_edu_classifier_pipeline_en_5.5.0_3.0_1725574934731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_edu_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_edu_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_edu_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/pszemraj/mpnet-base-edu-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_en.md new file mode 100644 index 00000000000000..4160ed3faf1a45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mpnet_base_news_about_gold MPNetForSequenceClassification from DunnBC22 +author: John Snow Labs +name: mpnet_base_news_about_gold +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_news_about_gold` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_news_about_gold_en_5.5.0_3.0_1725575461787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_news_about_gold_en_5.5.0_3.0_1725575461787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_news_about_gold","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base_news_about_gold", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_news_about_gold| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|390.4 MB| + +## References + +https://huggingface.co/DunnBC22/mpnet-base-News_About_Gold \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_pipeline_en.md new file mode 100644 index 00000000000000..7cf303aa7c6af7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mpnet_base_news_about_gold_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_base_news_about_gold_pipeline pipeline MPNetForSequenceClassification from DunnBC22 +author: John Snow Labs +name: mpnet_base_news_about_gold_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_news_about_gold_pipeline` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_news_about_gold_pipeline_en_5.5.0_3.0_1725575485589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_news_about_gold_pipeline_en_5.5.0_3.0_1725575485589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_news_about_gold_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_news_about_gold_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_news_about_gold_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.5 MB| + +## References + +https://huggingface.co/DunnBC22/mpnet-base-News_About_Gold + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_en.md new file mode 100644 index 00000000000000..b52ec9c3744d4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mrpc_microsoft_deberta_v3_large_seed_3 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mrpc_microsoft_deberta_v3_large_seed_3 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrpc_microsoft_deberta_v3_large_seed_3` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_3_en_5.5.0_3.0_1725562654261.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_3_en_5.5.0_3.0_1725562654261.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mrpc_microsoft_deberta_v3_large_seed_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mrpc_microsoft_deberta_v3_large_seed_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrpc_microsoft_deberta_v3_large_seed_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/mrpc_microsoft_deberta-v3-large_seed-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..51986f02db6b04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mrpc_microsoft_deberta_v3_large_seed_3_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mrpc_microsoft_deberta_v3_large_seed_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrpc_microsoft_deberta_v3_large_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en_5.5.0_3.0_1725562797173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_3_pipeline_en_5.5.0_3.0_1725562797173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mrpc_microsoft_deberta_v3_large_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mrpc_microsoft_deberta_v3_large_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrpc_microsoft_deberta_v3_large_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/mrpc_microsoft_deberta-v3-large_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en.md new file mode 100644 index 00000000000000..71e41b65045c49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mrr_qa_15k_until_2_08_finrtuned_on_21_7_model RoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: mrr_qa_15k_until_2_08_finrtuned_on_21_7_model +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrr_qa_15k_until_2_08_finrtuned_on_21_7_model` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en_5.5.0_3.0_1725576448707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_en_5.5.0_3.0_1725576448707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("mrr_qa_15k_until_2_08_finrtuned_on_21_7_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("mrr_qa_15k_until_2_08_finrtuned_on_21_7_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrr_qa_15k_until_2_08_finrtuned_on_21_7_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/am-infoweb/MRR_QA_15K_UNTIL_2_08_FINRTUNED_ON_21_7_MODEL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en.md new file mode 100644 index 00000000000000..ef06f1741df507 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline pipeline RoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en_5.5.0_3.0_1725576472383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline_en_5.5.0_3.0_1725576472383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrr_qa_15k_until_2_08_finrtuned_on_21_7_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/am-infoweb/MRR_QA_15K_UNTIL_2_08_FINRTUNED_ON_21_7_MODEL + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-multi_balanced_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-multi_balanced_model_en.md new file mode 100644 index 00000000000000..7ff88f02e9c7b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-multi_balanced_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English multi_balanced_model DistilBertForTokenClassification from SiriusW +author: John Snow Labs +name: multi_balanced_model +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_balanced_model` is a English model originally trained by SiriusW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_balanced_model_en_5.5.0_3.0_1725505938629.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_balanced_model_en_5.5.0_3.0_1725505938629.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("multi_balanced_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("multi_balanced_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_balanced_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/SiriusW/multi_balanced_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-multilingual_sentiment_covid19_xx.md b/docs/_posts/ahmedlone127/2024-09-05-multilingual_sentiment_covid19_xx.md new file mode 100644 index 00000000000000..c5f152af254de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-multilingual_sentiment_covid19_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_sentiment_covid19 XlmRoBertaForSequenceClassification from clampert +author: John Snow Labs +name: multilingual_sentiment_covid19 +date: 2024-09-05 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_sentiment_covid19` is a Multilingual model originally trained by clampert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_sentiment_covid19_xx_5.5.0_3.0_1725513576092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_sentiment_covid19_xx_5.5.0_3.0_1725513576092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_sentiment_covid19","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_sentiment_covid19", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_sentiment_covid19| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/clampert/multilingual-sentiment-covid19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_pipeline_xx.md new file mode 100644 index 00000000000000..8ebb7cc1733c16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual multilingual_toxic_text_detection_pipeline pipeline XlmRoBertaForSequenceClassification from marianna13 +author: John Snow Labs +name: multilingual_toxic_text_detection_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_toxic_text_detection_pipeline` is a Multilingual model originally trained by marianna13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_toxic_text_detection_pipeline_xx_5.5.0_3.0_1725526534133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_toxic_text_detection_pipeline_xx_5.5.0_3.0_1725526534133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multilingual_toxic_text_detection_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multilingual_toxic_text_detection_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_toxic_text_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|993.2 MB| + +## References + +https://huggingface.co/marianna13/multilingual-toxic-text-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_xx.md b/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_xx.md new file mode 100644 index 00000000000000..6a0e9675c0e667 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-multilingual_toxic_text_detection_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_toxic_text_detection XlmRoBertaForSequenceClassification from marianna13 +author: John Snow Labs +name: multilingual_toxic_text_detection +date: 2024-09-05 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_toxic_text_detection` is a Multilingual model originally trained by marianna13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_toxic_text_detection_xx_5.5.0_3.0_1725526485769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_toxic_text_detection_xx_5.5.0_3.0_1725526485769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_toxic_text_detection","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_toxic_text_detection", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_toxic_text_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|993.2 MB| + +## References + +https://huggingface.co/marianna13/multilingual-toxic-text-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_en.md new file mode 100644 index 00000000000000..81db1b6966707f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English muppet_roberta_base RoBertaEmbeddings from facebook +author: John Snow Labs +name: muppet_roberta_base +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muppet_roberta_base` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muppet_roberta_base_en_5.5.0_3.0_1725565908930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muppet_roberta_base_en_5.5.0_3.0_1725565908930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("muppet_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("muppet_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muppet_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|298.5 MB| + +## References + +https://huggingface.co/facebook/muppet-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..8ed44df4ce8f91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-muppet_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English muppet_roberta_base_pipeline pipeline RoBertaEmbeddings from facebook +author: John Snow Labs +name: muppet_roberta_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muppet_roberta_base_pipeline` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muppet_roberta_base_pipeline_en_5.5.0_3.0_1725565999806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muppet_roberta_base_pipeline_en_5.5.0_3.0_1725565999806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("muppet_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("muppet_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muppet_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|298.5 MB| + +## References + +https://huggingface.co/facebook/muppet-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-naija_twitter_sentiment_afriberta_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-naija_twitter_sentiment_afriberta_large_pipeline_en.md new file mode 100644 index 00000000000000..d7745e08ed1e4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-naija_twitter_sentiment_afriberta_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English naija_twitter_sentiment_afriberta_large_pipeline pipeline XlmRoBertaForSequenceClassification from Davlan +author: John Snow Labs +name: naija_twitter_sentiment_afriberta_large_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`naija_twitter_sentiment_afriberta_large_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/naija_twitter_sentiment_afriberta_large_pipeline_en_5.5.0_3.0_1725514078121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/naija_twitter_sentiment_afriberta_large_pipeline_en_5.5.0_3.0_1725514078121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("naija_twitter_sentiment_afriberta_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("naija_twitter_sentiment_afriberta_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|naija_twitter_sentiment_afriberta_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|470.5 MB| + +## References + +https://huggingface.co/Davlan/naija-twitter-sentiment-afriberta-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en.md new file mode 100644 index 00000000000000..74f1ed2509855e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_bert_large_cased_portuguese_lenerbr_finetuned_ner BertForTokenClassification from bradoc +author: John Snow Labs +name: ner_bert_large_cased_portuguese_lenerbr_finetuned_ner +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_bert_large_cased_portuguese_lenerbr_finetuned_ner` is a English model originally trained by bradoc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en_5.5.0_3.0_1725516299714.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_bert_large_cased_portuguese_lenerbr_finetuned_ner_en_5.5.0_3.0_1725516299714.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ner_bert_large_cased_portuguese_lenerbr_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ner_bert_large_cased_portuguese_lenerbr_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_bert_large_cased_portuguese_lenerbr_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/bradoc/ner-bert-large-cased-pt-lenerbr-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_cw_pipeline_testt_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_cw_pipeline_testt_en.md new file mode 100644 index 00000000000000..0b741454ffdbfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_cw_pipeline_testt_en.md @@ -0,0 +1,66 @@ +--- +layout: model +title: English ner_cw_pipeline_testt pipeline DistilBertForTokenClassification from ArshiaKarimian +author: John Snow Labs +name: ner_cw_pipeline_testt +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_cw_pipeline_testt` is a English model originally trained by ArshiaKarimian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_cw_pipeline_testt_en_5.5.0_3.0_1725518635043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_cw_pipeline_testt_en_5.5.0_3.0_1725518635043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_cw_pipeline_testt", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_cw_pipeline_testt", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_cw_pipeline_testt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ArshiaKarimian/NER_CW_PIPELINE_testt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_demo_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_demo_en.md new file mode 100644 index 00000000000000..24af94512b7e07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_demo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_demo DistilBertForTokenClassification from tensorboy +author: John Snow Labs +name: ner_demo +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_demo` is a English model originally trained by tensorboy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_demo_en_5.5.0_3.0_1725506120119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_demo_en_5.5.0_3.0_1725506120119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_demo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_demo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_demo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/tensorboy/ner_demo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_fine_tuned_beto_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-ner_fine_tuned_beto_pipeline_es.md new file mode 100644 index 00000000000000..8c15262124189d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_fine_tuned_beto_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish ner_fine_tuned_beto_pipeline pipeline BertForTokenClassification from NazaGara +author: John Snow Labs +name: ner_fine_tuned_beto_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_fine_tuned_beto_pipeline` is a Castilian, Spanish model originally trained by NazaGara. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_fine_tuned_beto_pipeline_es_5.5.0_3.0_1725516591482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_fine_tuned_beto_pipeline_es_5.5.0_3.0_1725516591482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_fine_tuned_beto_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_fine_tuned_beto_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_fine_tuned_beto_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/NazaGara/NER-fine-tuned-BETO + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_meddocan_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-ner_meddocan_pipeline_es.md new file mode 100644 index 00000000000000..60e8b8de63ea89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_meddocan_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish ner_meddocan_pipeline pipeline RoBertaForTokenClassification from Dnidof +author: John Snow Labs +name: ner_meddocan_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_meddocan_pipeline` is a Castilian, Spanish model originally trained by Dnidof. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_meddocan_pipeline_es_5.5.0_3.0_1725512966007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_meddocan_pipeline_es_5.5.0_3.0_1725512966007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_meddocan_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_meddocan_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_meddocan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|436.3 MB| + +## References + +https://huggingface.co/Dnidof/NER-MEDDOCAN + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en.md new file mode 100644 index 00000000000000..000a56ccfa805c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations BertForTokenClassification from poodledude +author: John Snow Labs +name: ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations +date: 2024-09-05 +tags: [bert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations` is a English model originally trained by poodledude. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en_5.5.0_3.0_1725539092321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_en_5.5.0_3.0_1725539092321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +tokenClassifier = BertForTokenClassification.pretrained("ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val tokenClassifier = BertForTokenClassification + .pretrained("ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +References + +https://huggingface.co/poodledude/ner-test-bert-base-uncased-finetuned-500K-AdamW-3-epoch-locations \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en.md new file mode 100644 index 00000000000000..2a486f27581333 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline pipeline BertForTokenClassification from adambuttrick +author: John Snow Labs +name: ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline` is a English model originally trained by adambuttrick. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en_5.5.0_3.0_1725539112794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline_en_5.5.0_3.0_1725539112794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_test_bert_base_uncased_finetuned_500k_adamw_3_epoch_locations_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/adambuttrick/ner-test-bert-base-uncased-finetuned-500K-AdamW-3-epoch-locations + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ner_totalamount_en.md b/docs/_posts/ahmedlone127/2024-09-05-ner_totalamount_en.md new file mode 100644 index 00000000000000..0b12e017686988 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ner_totalamount_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_totalamount DistilBertForTokenClassification from Pablito47 +author: John Snow Labs +name: ner_totalamount +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_totalamount` is a English model originally trained by Pablito47. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_totalamount_en_5.5.0_3.0_1725495814315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_totalamount_en_5.5.0_3.0_1725495814315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_totalamount","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_totalamount", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_totalamount| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Pablito47/NER-TOTALAMOUNT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_en.md new file mode 100644 index 00000000000000..5c50f6b93c10e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nerd_nerd_random2_seed1_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random2_seed1_bernice +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random2_seed1_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed1_bernice_en_5.5.0_3.0_1725527568895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed1_bernice_en_5.5.0_3.0_1725527568895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random2_seed1_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random2_seed1_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random2_seed1_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|832.0 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random2_seed1-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_pipeline_en.md new file mode 100644 index 00000000000000..7eeba3d68bc1f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed1_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nerd_nerd_random2_seed1_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random2_seed1_bernice_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random2_seed1_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed1_bernice_pipeline_en_5.5.0_3.0_1725527697033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed1_bernice_pipeline_en_5.5.0_3.0_1725527697033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerd_nerd_random2_seed1_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerd_nerd_random2_seed1_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random2_seed1_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|832.0 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random2_seed1-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_en.md new file mode 100644 index 00000000000000..24c3355e48e516 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nerd_nerd_random2_seed2_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random2_seed2_bernice +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random2_seed2_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed2_bernice_en_5.5.0_3.0_1725530347675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed2_bernice_en_5.5.0_3.0_1725530347675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random2_seed2_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random2_seed2_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random2_seed2_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|831.8 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random2_seed2-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_pipeline_en.md new file mode 100644 index 00000000000000..24ee21366a54c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nerd_nerd_random2_seed2_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nerd_nerd_random2_seed2_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random2_seed2_bernice_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random2_seed2_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed2_bernice_pipeline_en_5.5.0_3.0_1725530476557.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random2_seed2_bernice_pipeline_en_5.5.0_3.0_1725530476557.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerd_nerd_random2_seed2_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerd_nerd_random2_seed2_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random2_seed2_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|831.9 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random2_seed2-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nerkor_cars_onpp_hubert_hu.md b/docs/_posts/ahmedlone127/2024-09-05-nerkor_cars_onpp_hubert_hu.md new file mode 100644 index 00000000000000..f7f7dbaa6cdac8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nerkor_cars_onpp_hubert_hu.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hungarian nerkor_cars_onpp_hubert BertForTokenClassification from novakat +author: John Snow Labs +name: nerkor_cars_onpp_hubert +date: 2024-09-05 +tags: [hu, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerkor_cars_onpp_hubert` is a Hungarian model originally trained by novakat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerkor_cars_onpp_hubert_hu_5.5.0_3.0_1725511531161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerkor_cars_onpp_hubert_hu_5.5.0_3.0_1725511531161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nerkor_cars_onpp_hubert","hu") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nerkor_cars_onpp_hubert", "hu") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerkor_cars_onpp_hubert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|hu| +|Size:|412.7 MB| + +## References + +https://huggingface.co/novakat/nerkor-cars-onpp-hubert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_en.md b/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_en.md new file mode 100644 index 00000000000000..56280495600fa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English neural_cherche_sparse_embed BertEmbeddings from raphaelsty +author: John Snow Labs +name: neural_cherche_sparse_embed +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neural_cherche_sparse_embed` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neural_cherche_sparse_embed_en_5.5.0_3.0_1725552845014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neural_cherche_sparse_embed_en_5.5.0_3.0_1725552845014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("neural_cherche_sparse_embed","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("neural_cherche_sparse_embed","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neural_cherche_sparse_embed| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/raphaelsty/neural-cherche-sparse-embed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_pipeline_en.md new file mode 100644 index 00000000000000..a258c2bb91db36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-neural_cherche_sparse_embed_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English neural_cherche_sparse_embed_pipeline pipeline BertEmbeddings from raphaelsty +author: John Snow Labs +name: neural_cherche_sparse_embed_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neural_cherche_sparse_embed_pipeline` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neural_cherche_sparse_embed_pipeline_en_5.5.0_3.0_1725552870282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neural_cherche_sparse_embed_pipeline_en_5.5.0_3.0_1725552870282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("neural_cherche_sparse_embed_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("neural_cherche_sparse_embed_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neural_cherche_sparse_embed_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/raphaelsty/neural-cherche-sparse-embed + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_fa.md b/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_fa.md new file mode 100644 index 00000000000000..f0e389e993fce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_fa.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Persian neuraspeech_whisperbase WhisperForCTC from Neurai +author: John Snow Labs +name: neuraspeech_whisperbase +date: 2024-09-05 +tags: [fa, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neuraspeech_whisperbase` is a Persian model originally trained by Neurai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neuraspeech_whisperbase_fa_5.5.0_3.0_1725546805375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neuraspeech_whisperbase_fa_5.5.0_3.0_1725546805375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("neuraspeech_whisperbase","fa") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("neuraspeech_whisperbase", "fa") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neuraspeech_whisperbase| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fa| +|Size:|641.8 MB| + +## References + +https://huggingface.co/Neurai/NeuraSpeech_WhisperBase \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_pipeline_fa.md new file mode 100644 index 00000000000000..e0822faa4917b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-neuraspeech_whisperbase_pipeline_fa.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Persian neuraspeech_whisperbase_pipeline pipeline WhisperForCTC from Neurai +author: John Snow Labs +name: neuraspeech_whisperbase_pipeline +date: 2024-09-05 +tags: [fa, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neuraspeech_whisperbase_pipeline` is a Persian model originally trained by Neurai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neuraspeech_whisperbase_pipeline_fa_5.5.0_3.0_1725546849865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neuraspeech_whisperbase_pipeline_fa_5.5.0_3.0_1725546849865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("neuraspeech_whisperbase_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("neuraspeech_whisperbase_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neuraspeech_whisperbase_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|641.8 MB| + +## References + +https://huggingface.co/Neurai/NeuraSpeech_WhisperBase + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_en.md new file mode 100644 index 00000000000000..d75a6e17d530f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English newsbertje_base BertEmbeddings from LoicDL +author: John Snow Labs +name: newsbertje_base +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`newsbertje_base` is a English model originally trained by LoicDL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/newsbertje_base_en_5.5.0_3.0_1725534027166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/newsbertje_base_en_5.5.0_3.0_1725534027166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("newsbertje_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("newsbertje_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|newsbertje_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/LoicDL/NewsBERTje-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_pipeline_en.md new file mode 100644 index 00000000000000..85ad3b9465f866 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-newsbertje_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English newsbertje_base_pipeline pipeline BertEmbeddings from LoicDL +author: John Snow Labs +name: newsbertje_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`newsbertje_base_pipeline` is a English model originally trained by LoicDL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/newsbertje_base_pipeline_en_5.5.0_3.0_1725534046805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/newsbertje_base_pipeline_en_5.5.0_3.0_1725534046805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("newsbertje_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("newsbertje_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|newsbertje_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/LoicDL/NewsBERTje-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nli_conventional_fine_tuning_m4faisal_en.md b/docs/_posts/ahmedlone127/2024-09-05-nli_conventional_fine_tuning_m4faisal_en.md new file mode 100644 index 00000000000000..19b16f148fdfff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nli_conventional_fine_tuning_m4faisal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nli_conventional_fine_tuning_m4faisal AlbertForSequenceClassification from m4faisal +author: John Snow Labs +name: nli_conventional_fine_tuning_m4faisal +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nli_conventional_fine_tuning_m4faisal` is a English model originally trained by m4faisal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_m4faisal_en_5.5.0_3.0_1725509974987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nli_conventional_fine_tuning_m4faisal_en_5.5.0_3.0_1725509974987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("nli_conventional_fine_tuning_m4faisal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("nli_conventional_fine_tuning_m4faisal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nli_conventional_fine_tuning_m4faisal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m4faisal/NLI-Conventional-Fine-Tuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nli_sentence_contradiction_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-nli_sentence_contradiction_classification_pipeline_en.md new file mode 100644 index 00000000000000..dcac09e717b0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nli_sentence_contradiction_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nli_sentence_contradiction_classification_pipeline pipeline MPNetForSequenceClassification from mansee +author: John Snow Labs +name: nli_sentence_contradiction_classification_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nli_sentence_contradiction_classification_pipeline` is a English model originally trained by mansee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nli_sentence_contradiction_classification_pipeline_en_5.5.0_3.0_1725575617448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nli_sentence_contradiction_classification_pipeline_en_5.5.0_3.0_1725575617448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nli_sentence_contradiction_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nli_sentence_contradiction_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nli_sentence_contradiction_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/mansee/nli-sentence-contradiction-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_en.md b/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_en.md new file mode 100644 index 00000000000000..401595da465c5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English noise_memo_bert_3_02 XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: noise_memo_bert_3_02 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`noise_memo_bert_3_02` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/noise_memo_bert_3_02_en_5.5.0_3.0_1725526119959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/noise_memo_bert_3_02_en_5.5.0_3.0_1725526119959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("noise_memo_bert_3_02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("noise_memo_bert_3_02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|noise_memo_bert_3_02| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/Noise_MeMo_BERT-3_02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_pipeline_en.md new file mode 100644 index 00000000000000..d56d2070a93258 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-noise_memo_bert_3_02_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English noise_memo_bert_3_02_pipeline pipeline XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: noise_memo_bert_3_02_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`noise_memo_bert_3_02_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/noise_memo_bert_3_02_pipeline_en_5.5.0_3.0_1725526142381.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/noise_memo_bert_3_02_pipeline_en_5.5.0_3.0_1725526142381.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("noise_memo_bert_3_02_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("noise_memo_bert_3_02_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|noise_memo_bert_3_02_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/Noise_MeMo_BERT-3_02 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_bert_large_no.md b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_bert_large_no.md new file mode 100644 index 00000000000000..11ce5abcd78bbf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_bert_large_no.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Norwegian norwegian_bokml_bert_large BertEmbeddings from NbAiLab +author: John Snow Labs +name: norwegian_bokml_bert_large +date: 2024-09-05 +tags: ["no", open_source, onnx, embeddings, bert] +task: Embeddings +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_bert_large` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_large_no_5.5.0_3.0_1725519640802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_large_no_5.5.0_3.0_1725519640802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("norwegian_bokml_bert_large","no") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("norwegian_bokml_bert_large","no") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_bert_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|no| +|Size:|1.3 GB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_roberta_base_scandi_1e4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_roberta_base_scandi_1e4_pipeline_en.md new file mode 100644 index 00000000000000..7c74e07f8279b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_roberta_base_scandi_1e4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English norwegian_bokml_roberta_base_scandi_1e4_pipeline pipeline XlmRoBertaEmbeddings from NbAiLab +author: John Snow Labs +name: norwegian_bokml_roberta_base_scandi_1e4_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_roberta_base_scandi_1e4_pipeline` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandi_1e4_pipeline_en_5.5.0_3.0_1725555704513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandi_1e4_pipeline_en_5.5.0_3.0_1725555704513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_roberta_base_scandi_1e4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_roberta_base_scandi_1e4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_roberta_base_scandi_1e4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLab/nb-roberta-base-scandi-1e4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_no.md b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_no.md new file mode 100644 index 00000000000000..32bec3a10397c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_verbatim_nbailab WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_small_verbatim_nbailab +date: 2024-09-05 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_verbatim_nbailab` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailab_no_5.5.0_3.0_1725551545265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailab_no_5.5.0_3.0_1725551545265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_verbatim_nbailab","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_verbatim_nbailab", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_verbatim_nbailab| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-small-verbatim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no.md new file mode 100644 index 00000000000000..b00671de4e8459 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_verbatim_nbailab_pipeline pipeline WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_small_verbatim_nbailab_pipeline +date: 2024-09-05 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_verbatim_nbailab_pipeline` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no_5.5.0_3.0_1725551638648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailab_pipeline_no_5.5.0_3.0_1725551638648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_small_verbatim_nbailab_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_small_verbatim_nbailab_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_verbatim_nbailab_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-small-verbatim + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-norwegian_intent_classifier_model2_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-05-norwegian_intent_classifier_model2_pipeline_no.md new file mode 100644 index 00000000000000..a8c2c37e1f21bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-norwegian_intent_classifier_model2_pipeline_no.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Norwegian norwegian_intent_classifier_model2_pipeline pipeline DistilBertForSequenceClassification from Mukalingam0813 +author: John Snow Labs +name: norwegian_intent_classifier_model2_pipeline +date: 2024-09-05 +tags: ["no", open_source, pipeline, onnx] +task: Text Classification +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_intent_classifier_model2_pipeline` is a Norwegian model originally trained by Mukalingam0813. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_intent_classifier_model2_pipeline_no_5.5.0_3.0_1725580642955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_intent_classifier_model2_pipeline_no_5.5.0_3.0_1725580642955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_intent_classifier_model2_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_intent_classifier_model2_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_intent_classifier_model2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|507.7 MB| + +## References + +https://huggingface.co/Mukalingam0813/Norwegian-intent-classifier-model2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nuner_v0_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-nuner_v0_1_pipeline_en.md new file mode 100644 index 00000000000000..87cd2b0b9f8d37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nuner_v0_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nuner_v0_1_pipeline pipeline RoBertaForTokenClassification from numind +author: John Snow Labs +name: nuner_v0_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuner_v0_1_pipeline` is a English model originally trained by numind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuner_v0_1_pipeline_en_5.5.0_3.0_1725502478657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuner_v0_1_pipeline_en_5.5.0_3.0_1725502478657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nuner_v0_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nuner_v0_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuner_v0_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|346.2 MB| + +## References + +https://huggingface.co/numind/NuNER-v0.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nuner_v1_orgs_en.md b/docs/_posts/ahmedlone127/2024-09-05-nuner_v1_orgs_en.md new file mode 100644 index 00000000000000..58b469bf98209a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nuner_v1_orgs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nuner_v1_orgs RoBertaForTokenClassification from guishe +author: John Snow Labs +name: nuner_v1_orgs +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuner_v1_orgs` is a English model originally trained by guishe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuner_v1_orgs_en_5.5.0_3.0_1725501654196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuner_v1_orgs_en_5.5.0_3.0_1725501654196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("nuner_v1_orgs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("nuner_v1_orgs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuner_v1_orgs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|462.3 MB| + +## References + +https://huggingface.co/guishe/nuner-v1_orgs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-nuner_v2_fewnerd_fine_super_en.md b/docs/_posts/ahmedlone127/2024-09-05-nuner_v2_fewnerd_fine_super_en.md new file mode 100644 index 00000000000000..21ece6a6f7b723 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-nuner_v2_fewnerd_fine_super_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nuner_v2_fewnerd_fine_super RoBertaForTokenClassification from guishe +author: John Snow Labs +name: nuner_v2_fewnerd_fine_super +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuner_v2_fewnerd_fine_super` is a English model originally trained by guishe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuner_v2_fewnerd_fine_super_en_5.5.0_3.0_1725512672050.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuner_v2_fewnerd_fine_super_en_5.5.0_3.0_1725512672050.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("nuner_v2_fewnerd_fine_super","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("nuner_v2_fewnerd_fine_super", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuner_v2_fewnerd_fine_super| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/guishe/nuner-v2_fewnerd_fine_super \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en.md new file mode 100644 index 00000000000000..9f5400e10588c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1 MarianTransformer from meghazisofiane +author: John Snow Labs +name: opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1` is a English model originally trained by meghazisofiane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en_5.5.0_3.0_1725545818173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_en_5.5.0_3.0_1725545818173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|528.2 MB| + +## References + +https://huggingface.co/meghazisofiane/opus-mt-en-ar-evaluated-en-to-ar-4000instances-opus-leaningRate2e-05-batchSize8-11-action-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en.md new file mode 100644 index 00000000000000..dd2923e91e2537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline pipeline MarianTransformer from meghazisofiane +author: John Snow Labs +name: opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline` is a English model originally trained by meghazisofiane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en_5.5.0_3.0_1725545852149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline_en_5.5.0_3.0_1725545852149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_4000instances_opus_leaningrate2e_05_batchsize8_11_action_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.7 MB| + +## References + +https://huggingface.co/meghazisofiane/opus-mt-en-ar-evaluated-en-to-ar-4000instances-opus-leaningRate2e-05-batchSize8-11-action-1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_en.md new file mode 100644 index 00000000000000..989c87ff629163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_chinese_twi MarianTransformer from steve-tong +author: John Snow Labs +name: opus_maltese_english_chinese_twi +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_chinese_twi` is a English model originally trained by steve-tong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_twi_en_5.5.0_3.0_1725546213293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_twi_en_5.5.0_3.0_1725546213293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_chinese_twi","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_chinese_twi","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_chinese_twi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|542.3 MB| + +## References + +https://huggingface.co/steve-tong/opus-mt-en-zh-tw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_pipeline_en.md new file mode 100644 index 00000000000000..d32235dce571fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_chinese_twi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_chinese_twi_pipeline pipeline MarianTransformer from steve-tong +author: John Snow Labs +name: opus_maltese_english_chinese_twi_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_chinese_twi_pipeline` is a English model originally trained by steve-tong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_twi_pipeline_en_5.5.0_3.0_1725546241214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_twi_pipeline_en_5.5.0_3.0_1725546241214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_chinese_twi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_chinese_twi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_chinese_twi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|542.8 MB| + +## References + +https://huggingface.co/steve-tong/opus-mt-en-zh-tw + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en.md new file mode 100644 index 00000000000000..abccde1a6f547c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23 MarianTransformer from SY23 +author: John Snow Labs +name: opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23` is a English model originally trained by SY23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en_5.5.0_3.0_1725545258709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_en_5.5.0_3.0_1725545258709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.3 MB| + +## References + +https://huggingface.co/SY23/opus-mt-en-fr-finetuned-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en.md new file mode 100644 index 00000000000000..67394029b72623 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline pipeline MarianTransformer from SY23 +author: John Snow Labs +name: opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline` is a English model originally trained by SY23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en_5.5.0_3.0_1725545286684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline_en_5.5.0_3.0_1725545286684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_french_finetuned_english_tonga_tonga_islands_french_sy23_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.9 MB| + +## References + +https://huggingface.co/SY23/opus-mt-en-fr-finetuned-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_en.md new file mode 100644 index 00000000000000..e2200dba70cc36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_french_finetuned_must_c MarianTransformer from enimai +author: John Snow Labs +name: opus_maltese_english_french_finetuned_must_c +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_french_finetuned_must_c` is a English model originally trained by enimai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_must_c_en_5.5.0_3.0_1725545489614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_must_c_en_5.5.0_3.0_1725545489614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_french_finetuned_must_c","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_french_finetuned_must_c","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_french_finetuned_must_c| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.3 MB| + +## References + +https://huggingface.co/enimai/OPUS-mt-en-fr-finetuned-MUST-C \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_pipeline_en.md new file mode 100644 index 00000000000000..11a578e04c2d2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_french_finetuned_must_c_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_french_finetuned_must_c_pipeline pipeline MarianTransformer from enimai +author: John Snow Labs +name: opus_maltese_english_french_finetuned_must_c_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_french_finetuned_must_c_pipeline` is a English model originally trained by enimai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_must_c_pipeline_en_5.5.0_3.0_1725545516450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_french_finetuned_must_c_pipeline_en_5.5.0_3.0_1725545516450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_french_finetuned_must_c_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_french_finetuned_must_c_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_french_finetuned_must_c_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.9 MB| + +## References + +https://huggingface.co/enimai/OPUS-mt-en-fr-finetuned-MUST-C + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en.md new file mode 100644 index 00000000000000..51292ef6890ac3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius MarianTransformer from Tobius +author: John Snow Labs +name: opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius` is a English model originally trained by Tobius. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en_5.5.0_3.0_1725545951724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_en_5.5.0_3.0_1725545951724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|514.2 MB| + +## References + +https://huggingface.co/Tobius/opus-mt-en-lg-finetuned-en-to-lg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en.md new file mode 100644 index 00000000000000..f5a979942610b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline pipeline MarianTransformer from Tobius +author: John Snow Labs +name: opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline` is a English model originally trained by Tobius. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en_5.5.0_3.0_1725545978141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline_en_5.5.0_3.0_1725545978141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ganda_tobius_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|514.8 MB| + +## References + +https://huggingface.co/Tobius/opus-mt-en-lg-finetuned-en-to-lg + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en.md new file mode 100644 index 00000000000000..0705e0b8c076a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu MarianTransformer from yonathanstwn +author: John Snow Labs +name: opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu` is a English model originally trained by yonathanstwn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en_5.5.0_3.0_1725546018437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_en_5.5.0_3.0_1725546018437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|481.7 MB| + +## References + +https://huggingface.co/yonathanstwn/opus-mt-en-id-ccmatrix-lr-5-best-bleu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en.md new file mode 100644 index 00000000000000..2687993b8a1726 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline pipeline MarianTransformer from yonathanstwn +author: John Snow Labs +name: opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline` is a English model originally trained by yonathanstwn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en_5.5.0_3.0_1725546043257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline_en_5.5.0_3.0_1725546043257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_indonesian_ccmatrix_lr_5_best_bleu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|482.3 MB| + +## References + +https://huggingface.co/yonathanstwn/opus-mt-en-id-ccmatrix-lr-5-best-bleu + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en.md new file mode 100644 index 00000000000000..dcb8286501bcb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi MarianTransformer from mpiccardi +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi` is a English model originally trained by mpiccardi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en_5.5.0_3.0_1725544893527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_en_5.5.0_3.0_1725544893527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.5 MB| + +## References + +https://huggingface.co/mpiccardi/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en.md new file mode 100644 index 00000000000000..313cfb3a513ca9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline pipeline MarianTransformer from mpiccardi +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline` is a English model originally trained by mpiccardi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en_5.5.0_3.0_1725544920426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline_en_5.5.0_3.0_1725544920426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_mpiccardi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.1 MB| + +## References + +https://huggingface.co/mpiccardi/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en.md new file mode 100644 index 00000000000000..21daf9f58155c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european MarianTransformer from himanshubeniwal +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european` is a English model originally trained by himanshubeniwal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en_5.5.0_3.0_1725546162089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european_en_5.5.0_3.0_1725546162089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_european| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.0 MB| + +## References + +https://huggingface.co/himanshubeniwal/opus-mt-en-ro-finetuned-ro-to-en-European \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es.md new file mode 100644 index 00000000000000..6cacf0c119d928 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish MarianTransformer from edu-shok +author: John Snow Labs +name: opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish +date: 2024-09-05 +tags: [es, open_source, onnx, translation, marian] +task: Translation +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish` is a Castilian, Spanish model originally trained by edu-shok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es_5.5.0_3.0_1725545609172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_es_5.5.0_3.0_1725545609172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish","es") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish","es") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|es| +|Size:|540.1 MB| + +## References + +https://huggingface.co/edu-shok/opus-mt-en-es-finetuned-en-to-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es.md new file mode 100644 index 00000000000000..bc8ac49ba7b974 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline pipeline MarianTransformer from edu-shok +author: John Snow Labs +name: opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Translation +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline` is a Castilian, Spanish model originally trained by edu-shok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es_5.5.0_3.0_1725545638069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline_es_5.5.0_3.0_1725545638069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_spanish_finetuned_english_tonga_tonga_islands_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|540.6 MB| + +## References + +https://huggingface.co/edu-shok/opus-mt-en-es-finetuned-en-to-es + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en.md new file mode 100644 index 00000000000000..68d20768c670ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline pipeline MarianTransformer from mekjr1 +author: John Snow Labs +name: opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline` is a English model originally trained by mekjr1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en_5.5.0_3.0_1725494912563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline_en_5.5.0_3.0_1725494912563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_spanish_finetuned_spanish_tonga_tonga_islands_sja_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|540.5 MB| + +## References + +https://huggingface.co/mekjr1/opus-mt-en-es-finetuned-es-to-sja + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en.md new file mode 100644 index 00000000000000..6ea9cea982d1f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml MarianTransformer from Edomonndo +author: John Snow Labs +name: opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml` is a English model originally trained by Edomonndo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en_5.5.0_3.0_1725545614387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_en_5.5.0_3.0_1725545614387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|514.7 MB| + +## References + +https://huggingface.co/Edomonndo/opus-mt-ja-en-finetuned-ja-to-en_xml \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en.md new file mode 100644 index 00000000000000..4a3a7beff1488b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline pipeline MarianTransformer from Edomonndo +author: John Snow Labs +name: opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline` is a English model originally trained by Edomonndo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en_5.5.0_3.0_1725545645686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline_en_5.5.0_3.0_1725545645686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_japanese_english_finetuned_japanese_tonga_tonga_islands_english_xml_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|515.3 MB| + +## References + +https://huggingface.co/Edomonndo/opus-mt-ja-en-finetuned-ja-to-en_xml + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en.md new file mode 100644 index 00000000000000..27abbb7ba8e7b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline pipeline MarianTransformer from Galucier +author: John Snow Labs +name: opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline` is a English model originally trained by Galucier. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en_5.5.0_3.0_1725494859049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline_en_5.5.0_3.0_1725494859049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_thai_english_finetuned_english_tonga_tonga_islands_thai_galucier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|524.8 MB| + +## References + +https://huggingface.co/Galucier/opus-mt-th-en-finetuned-en-to-th + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-output_ben_epstein_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-output_ben_epstein_pipeline_en.md new file mode 100644 index 00000000000000..e19f8f8f1d4d7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-output_ben_epstein_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English output_ben_epstein_pipeline pipeline DistilBertForTokenClassification from ben-epstein +author: John Snow Labs +name: output_ben_epstein_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`output_ben_epstein_pipeline` is a English model originally trained by ben-epstein. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/output_ben_epstein_pipeline_en_5.5.0_3.0_1725500394176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/output_ben_epstein_pipeline_en_5.5.0_3.0_1725500394176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("output_ben_epstein_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("output_ben_epstein_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|output_ben_epstein_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/ben-epstein/output + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-paiute_tonga_tonga_islands_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-paiute_tonga_tonga_islands_english_en.md new file mode 100644 index 00000000000000..74503cfb721de2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-paiute_tonga_tonga_islands_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English paiute_tonga_tonga_islands_english MarianTransformer from jcole333 +author: John Snow Labs +name: paiute_tonga_tonga_islands_english +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paiute_tonga_tonga_islands_english` is a English model originally trained by jcole333. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paiute_tonga_tonga_islands_english_en_5.5.0_3.0_1725544723516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paiute_tonga_tonga_islands_english_en_5.5.0_3.0_1725544723516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("paiute_tonga_tonga_islands_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("paiute_tonga_tonga_islands_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paiute_tonga_tonga_islands_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|532.5 MB| + +## References + +https://huggingface.co/jcole333/paiute-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_en.md b/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_en.md new file mode 100644 index 00000000000000..893ecb8cc8cd3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English patstat_citation_parser DistilBertForTokenClassification from nicolauduran45 +author: John Snow Labs +name: patstat_citation_parser +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`patstat_citation_parser` is a English model originally trained by nicolauduran45. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/patstat_citation_parser_en_5.5.0_3.0_1725500300250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/patstat_citation_parser_en_5.5.0_3.0_1725500300250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("patstat_citation_parser","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("patstat_citation_parser", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|patstat_citation_parser| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.5 MB| + +## References + +https://huggingface.co/nicolauduran45/patstat-citation-parser \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_pipeline_en.md new file mode 100644 index 00000000000000..d9f118213dc429 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-patstat_citation_parser_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English patstat_citation_parser_pipeline pipeline DistilBertForTokenClassification from nicolauduran45 +author: John Snow Labs +name: patstat_citation_parser_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`patstat_citation_parser_pipeline` is a English model originally trained by nicolauduran45. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/patstat_citation_parser_pipeline_en_5.5.0_3.0_1725500323646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/patstat_citation_parser_pipeline_en_5.5.0_3.0_1725500323646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("patstat_citation_parser_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("patstat_citation_parser_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|patstat_citation_parser_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.5 MB| + +## References + +https://huggingface.co/nicolauduran45/patstat-citation-parser + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_pipeline_vi.md new file mode 100644 index 00000000000000..d71a2381a098d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_pipeline_vi.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Vietnamese phowhisper_base_vinai_pipeline pipeline WhisperForCTC from vinai +author: John Snow Labs +name: phowhisper_base_vinai_pipeline +date: 2024-09-05 +tags: [vi, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phowhisper_base_vinai_pipeline` is a Vietnamese model originally trained by vinai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phowhisper_base_vinai_pipeline_vi_5.5.0_3.0_1725547830589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phowhisper_base_vinai_pipeline_vi_5.5.0_3.0_1725547830589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("phowhisper_base_vinai_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("phowhisper_base_vinai_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phowhisper_base_vinai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|640.9 MB| + +## References + +https://huggingface.co/vinai/PhoWhisper-base + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_vi.md b/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_vi.md new file mode 100644 index 00000000000000..ebbec13eafdb82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-phowhisper_base_vinai_vi.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Vietnamese phowhisper_base_vinai WhisperForCTC from vinai +author: John Snow Labs +name: phowhisper_base_vinai +date: 2024-09-05 +tags: [vi, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phowhisper_base_vinai` is a Vietnamese model originally trained by vinai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phowhisper_base_vinai_vi_5.5.0_3.0_1725547791724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phowhisper_base_vinai_vi_5.5.0_3.0_1725547791724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("phowhisper_base_vinai","vi") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("phowhisper_base_vinai", "vi") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phowhisper_base_vinai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|vi| +|Size:|640.8 MB| + +## References + +https://huggingface.co/vinai/PhoWhisper-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pii_model_ankitcodes_en.md b/docs/_posts/ahmedlone127/2024-09-05-pii_model_ankitcodes_en.md new file mode 100644 index 00000000000000..d72bbbc6ac2324 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pii_model_ankitcodes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pii_model_ankitcodes BertForTokenClassification from ankitcodes +author: John Snow Labs +name: pii_model_ankitcodes +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_model_ankitcodes` is a English model originally trained by ankitcodes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_model_ankitcodes_en_5.5.0_3.0_1725510931095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_model_ankitcodes_en_5.5.0_3.0_1725510931095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("pii_model_ankitcodes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("pii_model_ankitcodes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_model_ankitcodes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/ankitcodes/pii_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pll_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-pll_model_pipeline_en.md new file mode 100644 index 00000000000000..b477312d27d75a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pll_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pll_model_pipeline pipeline DistilBertForTokenClassification from Yaroslavbud +author: John Snow Labs +name: pll_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pll_model_pipeline` is a English model originally trained by Yaroslavbud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pll_model_pipeline_en_5.5.0_3.0_1725495814940.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pll_model_pipeline_en_5.5.0_3.0_1725495814940.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pll_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pll_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pll_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Yaroslavbud/PLL_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-poe_qa_mpnetbase_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-poe_qa_mpnetbase_pipeline_en.md new file mode 100644 index 00000000000000..5a5047e1978086 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-poe_qa_mpnetbase_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English poe_qa_mpnetbase_pipeline pipeline MPNetForSequenceClassification from facehugger92 +author: John Snow Labs +name: poe_qa_mpnetbase_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`poe_qa_mpnetbase_pipeline` is a English model originally trained by facehugger92. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/poe_qa_mpnetbase_pipeline_en_5.5.0_3.0_1725574936452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/poe_qa_mpnetbase_pipeline_en_5.5.0_3.0_1725574936452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("poe_qa_mpnetbase_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("poe_qa_mpnetbase_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|poe_qa_mpnetbase_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.6 MB| + +## References + +https://huggingface.co/facehugger92/POE_QA_mpnetbase + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-05-portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt.md new file mode 100644 index 00000000000000..242249ac7a613d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese portuguese_capitalization_punctuation_restoration_sanivert_pipeline pipeline BertForTokenClassification from VOCALINLP +author: John Snow Labs +name: portuguese_capitalization_punctuation_restoration_sanivert_pipeline +date: 2024-09-05 +tags: [pt, open_source, pipeline, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_capitalization_punctuation_restoration_sanivert_pipeline` is a Portuguese model originally trained by VOCALINLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt_5.5.0_3.0_1725515665681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_capitalization_punctuation_restoration_sanivert_pipeline_pt_5.5.0_3.0_1725515665681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("portuguese_capitalization_punctuation_restoration_sanivert_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("portuguese_capitalization_punctuation_restoration_sanivert_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_capitalization_punctuation_restoration_sanivert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/VOCALINLP/portuguese_capitalization_punctuation_restoration_sanivert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en.md b/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en.md new file mode 100644 index 00000000000000..2c69e9087ff8d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English portuguese_up_xlmr_fewshot_falsetrue_0_0_best XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_up_xlmr_fewshot_falsetrue_0_0_best +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_up_xlmr_fewshot_falsetrue_0_0_best` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en_5.5.0_3.0_1725527150412.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_fewshot_falsetrue_0_0_best_en_5.5.0_3.0_1725527150412.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_up_xlmr_fewshot_falsetrue_0_0_best","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_up_xlmr_fewshot_falsetrue_0_0_best", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_up_xlmr_fewshot_falsetrue_0_0_best| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|783.3 MB| + +## References + +https://huggingface.co/harish/PT-UP-xlmR-FewShot-FalseTrue-0_0_BEST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en.md new file mode 100644 index 00000000000000..34faff592ba07f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline pipeline XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en_5.5.0_3.0_1725527290770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline_en_5.5.0_3.0_1725527290770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_up_xlmr_fewshot_falsetrue_0_0_best_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.3 MB| + +## References + +https://huggingface.co/harish/PT-UP-xlmR-FewShot-FalseTrue-0_0_BEST + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-predicting_misdirection_en.md b/docs/_posts/ahmedlone127/2024-09-05-predicting_misdirection_en.md new file mode 100644 index 00000000000000..cd4252c780bf6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-predicting_misdirection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English predicting_misdirection DistilBertForSequenceClassification from Eappelson +author: John Snow Labs +name: predicting_misdirection +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predicting_misdirection` is a English model originally trained by Eappelson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predicting_misdirection_en_5.5.0_3.0_1725507785953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predicting_misdirection_en_5.5.0_3.0_1725507785953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("predicting_misdirection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("predicting_misdirection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predicting_misdirection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Eappelson/predicting_misdirection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_en.md b/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_en.md new file mode 100644 index 00000000000000..219046161c4d80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pretrained_xlm_portuguese_e5_select XlmRoBertaEmbeddings from harish +author: John Snow Labs +name: pretrained_xlm_portuguese_e5_select +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pretrained_xlm_portuguese_e5_select` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pretrained_xlm_portuguese_e5_select_en_5.5.0_3.0_1725555255085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pretrained_xlm_portuguese_e5_select_en_5.5.0_3.0_1725555255085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("pretrained_xlm_portuguese_e5_select","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("pretrained_xlm_portuguese_e5_select","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pretrained_xlm_portuguese_e5_select| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/harish/preTrained-xlm-pt-e5-select \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_pipeline_en.md new file mode 100644 index 00000000000000..49cb9e72e6d78c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pretrained_xlm_portuguese_e5_select_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pretrained_xlm_portuguese_e5_select_pipeline pipeline XlmRoBertaEmbeddings from harish +author: John Snow Labs +name: pretrained_xlm_portuguese_e5_select_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pretrained_xlm_portuguese_e5_select_pipeline` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pretrained_xlm_portuguese_e5_select_pipeline_en_5.5.0_3.0_1725555311117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pretrained_xlm_portuguese_e5_select_pipeline_en_5.5.0_3.0_1725555311117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pretrained_xlm_portuguese_e5_select_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pretrained_xlm_portuguese_e5_select_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pretrained_xlm_portuguese_e5_select_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/harish/preTrained-xlm-pt-e5-select + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_en.md b/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_en.md new file mode 100644 index 00000000000000..017b8747c75b6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ptsdbert_large RoBertaEmbeddings from YWU99u +author: John Snow Labs +name: ptsdbert_large +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ptsdbert_large` is a English model originally trained by YWU99u. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ptsdbert_large_en_5.5.0_3.0_1725578086509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ptsdbert_large_en_5.5.0_3.0_1725578086509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("ptsdbert_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("ptsdbert_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ptsdbert_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/YWU99u/ptsdBERT_large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_pipeline_en.md new file mode 100644 index 00000000000000..0e5ef07fc550ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ptsdbert_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ptsdbert_large_pipeline pipeline RoBertaEmbeddings from YWU99u +author: John Snow Labs +name: ptsdbert_large_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ptsdbert_large_pipeline` is a English model originally trained by YWU99u. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ptsdbert_large_pipeline_en_5.5.0_3.0_1725578151969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ptsdbert_large_pipeline_en_5.5.0_3.0_1725578151969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ptsdbert_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ptsdbert_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ptsdbert_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/YWU99u/ptsdBERT_large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_120k_en.md b/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_120k_en.md new file mode 100644 index 00000000000000..39ba729a7152a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_120k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pubchem10m_smiles_bpe_120k RoBertaEmbeddings from seyonec +author: John Snow Labs +name: pubchem10m_smiles_bpe_120k +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubchem10m_smiles_bpe_120k` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_120k_en_5.5.0_3.0_1725565819684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_120k_en_5.5.0_3.0_1725565819684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_120k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_120k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubchem10m_smiles_bpe_120k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|310.8 MB| + +## References + +https://huggingface.co/seyonec/PubChem10M_SMILES_BPE_120k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_50k_en.md b/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_50k_en.md new file mode 100644 index 00000000000000..d8533e8503782c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-pubchem10m_smiles_bpe_50k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pubchem10m_smiles_bpe_50k RoBertaEmbeddings from seyonec +author: John Snow Labs +name: pubchem10m_smiles_bpe_50k +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubchem10m_smiles_bpe_50k` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_50k_en_5.5.0_3.0_1725578715611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_50k_en_5.5.0_3.0_1725578715611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_50k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_50k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubchem10m_smiles_bpe_50k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|310.2 MB| + +## References + +https://huggingface.co/seyonec/PubChem10M_SMILES_BPE_50k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-puoberta_pipeline_tn.md b/docs/_posts/ahmedlone127/2024-09-05-puoberta_pipeline_tn.md new file mode 100644 index 00000000000000..05f24aaa2602a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-puoberta_pipeline_tn.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Tswana puoberta_pipeline pipeline RoBertaEmbeddings from dsfsi +author: John Snow Labs +name: puoberta_pipeline +date: 2024-09-05 +tags: [tn, open_source, pipeline, onnx] +task: Embeddings +language: tn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`puoberta_pipeline` is a Tswana model originally trained by dsfsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/puoberta_pipeline_tn_5.5.0_3.0_1725566269367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/puoberta_pipeline_tn_5.5.0_3.0_1725566269367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("puoberta_pipeline", lang = "tn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("puoberta_pipeline", lang = "tn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|puoberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tn| +|Size:|311.7 MB| + +## References + +https://huggingface.co/dsfsi/PuoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-puoberta_tn.md b/docs/_posts/ahmedlone127/2024-09-05-puoberta_tn.md new file mode 100644 index 00000000000000..a7b46f42d5585d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-puoberta_tn.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Tswana puoberta RoBertaEmbeddings from dsfsi +author: John Snow Labs +name: puoberta +date: 2024-09-05 +tags: [tn, open_source, onnx, embeddings, roberta] +task: Embeddings +language: tn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`puoberta` is a Tswana model originally trained by dsfsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/puoberta_tn_5.5.0_3.0_1725566252324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/puoberta_tn_5.5.0_3.0_1725566252324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("puoberta","tn") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("puoberta","tn") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|puoberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|tn| +|Size:|311.7 MB| + +## References + +https://huggingface.co/dsfsi/PuoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_en.md new file mode 100644 index 00000000000000..5284484b50096f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_redaction_nov1_16 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_redaction_nov1_16 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_16` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_16_en_5.5.0_3.0_1725497138280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_16_en_5.5.0_3.0_1725497138280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_16","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_16", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.4 MB| + +## References + +https://huggingface.co/am-infoweb/QA_REDACTION_NOV1_16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_pipeline_en.md new file mode 100644 index 00000000000000..8c4f1e6575bbaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_16_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_redaction_nov1_16_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_redaction_nov1_16_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_16_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_16_pipeline_en_5.5.0_3.0_1725497257255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_16_pipeline_en_5.5.0_3.0_1725497257255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_redaction_nov1_16_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_redaction_nov1_16_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_16_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.4 MB| + +## References + +https://huggingface.co/am-infoweb/QA_REDACTION_NOV1_16 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_en.md new file mode 100644 index 00000000000000..aadc107e1d706c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_redaction_nov1_19_a2 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_redaction_nov1_19_a2 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_19_a2` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a2_en_5.5.0_3.0_1725559247210.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a2_en_5.5.0_3.0_1725559247210.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_19_a2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_19_a2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_19_a2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/am-infoweb/QA_REDACTION_NOV1_19-a2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_pipeline_en.md new file mode 100644 index 00000000000000..aa88ca73c12118 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_redaction_nov1_19_a2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_redaction_nov1_19_a2_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_redaction_nov1_19_a2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_19_a2_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a2_pipeline_en_5.5.0_3.0_1725559373443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a2_pipeline_en_5.5.0_3.0_1725559373443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_redaction_nov1_19_a2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_redaction_nov1_19_a2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_19_a2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/am-infoweb/QA_REDACTION_NOV1_19-a2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_02_oct_with_finetune_1_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_02_oct_with_finetune_1_1_en.md new file mode 100644 index 00000000000000..98b44aa1ecd922 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_02_oct_with_finetune_1_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_02_oct_with_finetune_1_1 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_02_oct_with_finetune_1_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_02_oct_with_finetune_1_1` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_02_oct_with_finetune_1_1_en_5.5.0_3.0_1725497482772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_02_oct_with_finetune_1_1_en_5.5.0_3.0_1725497482772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_02_oct_with_finetune_1_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_02_oct_with_finetune_1_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_02_oct_with_finetune_1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|805.1 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_02_OCT_WITH_FINETUNE_1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_en.md new file mode 100644 index 00000000000000..84583e6d388338 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_22_sept_with_finetune_1_0 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_22_sept_with_finetune_1_0 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_22_sept_with_finetune_1_0` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_22_sept_with_finetune_1_0_en_5.5.0_3.0_1725570650815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_22_sept_with_finetune_1_0_en_5.5.0_3.0_1725570650815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_22_sept_with_finetune_1_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_22_sept_with_finetune_1_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_22_sept_with_finetune_1_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|812.4 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_22_SEPT_WITH_FINETUNE_1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_pipeline_en.md new file mode 100644 index 00000000000000..2b6f010ab04844 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_22_sept_with_finetune_1_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_22_sept_with_finetune_1_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_22_sept_with_finetune_1_0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_22_sept_with_finetune_1_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_22_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725570756551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_22_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725570756551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_22_sept_with_finetune_1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_22_sept_with_finetune_1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_22_sept_with_finetune_1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|812.4 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_22_SEPT_WITH_FINETUNE_1.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_en.md new file mode 100644 index 00000000000000..6a0631e3db000e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_25_sept_with_finetune_1_0 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_25_sept_with_finetune_1_0 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_25_sept_with_finetune_1_0` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_0_en_5.5.0_3.0_1725557731432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_0_en_5.5.0_3.0_1725557731432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_25_sept_with_finetune_1_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_25_sept_with_finetune_1_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_25_sept_with_finetune_1_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|803.0 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_25_SEPT_WITH_FINETUNE_1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_pipeline_en.md new file mode 100644 index 00000000000000..baf2a0c2d86b0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_25_sept_with_finetune_1_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_25_sept_with_finetune_1_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_25_sept_with_finetune_1_0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_25_sept_with_finetune_1_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725557850715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725557850715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_25_sept_with_finetune_1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_25_sept_with_finetune_1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_25_sept_with_finetune_1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|803.0 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_25_SEPT_WITH_FINETUNE_1.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_en.md new file mode 100644 index 00000000000000..38b8e7b5e2566e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_26_sept_with_finetune_1_1 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_26_sept_with_finetune_1_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_26_sept_with_finetune_1_1` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_26_sept_with_finetune_1_1_en_5.5.0_3.0_1725566931946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_26_sept_with_finetune_1_1_en_5.5.0_3.0_1725566931946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_26_sept_with_finetune_1_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_26_sept_with_finetune_1_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_26_sept_with_finetune_1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|808.5 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_26_SEPT_WITH_FINETUNE_1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_pipeline_en.md new file mode 100644 index 00000000000000..6638785a1762c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_26_sept_with_finetune_1_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_26_sept_with_finetune_1_1_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_26_sept_with_finetune_1_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_26_sept_with_finetune_1_1_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_26_sept_with_finetune_1_1_pipeline_en_5.5.0_3.0_1725567046447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_26_sept_with_finetune_1_1_pipeline_en_5.5.0_3.0_1725567046447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_26_sept_with_finetune_1_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_26_sept_with_finetune_1_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_26_sept_with_finetune_1_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|808.5 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_26_SEPT_WITH_FINETUNE_1.1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_en.md new file mode 100644 index 00000000000000..2bf9b798fa4056 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_27_sept_with_finetune_1_1 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_27_sept_with_finetune_1_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_27_sept_with_finetune_1_1` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_27_sept_with_finetune_1_1_en_5.5.0_3.0_1725567293491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_27_sept_with_finetune_1_1_en_5.5.0_3.0_1725567293491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_27_sept_with_finetune_1_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_27_sept_with_finetune_1_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_27_sept_with_finetune_1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|805.3 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_27_SEPT_WITH_FINETUNE_1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_pipeline_en.md new file mode 100644 index 00000000000000..1477a96da759b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_27_sept_with_finetune_1_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_27_sept_with_finetune_1_1_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_27_sept_with_finetune_1_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_27_sept_with_finetune_1_1_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_27_sept_with_finetune_1_1_pipeline_en_5.5.0_3.0_1725567417342.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_27_sept_with_finetune_1_1_pipeline_en_5.5.0_3.0_1725567417342.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_27_sept_with_finetune_1_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_27_sept_with_finetune_1_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_27_sept_with_finetune_1_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|805.3 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_27_SEPT_WITH_FINETUNE_1.1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en.md new file mode 100644 index 00000000000000..66657ac28e0ba3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en_5.5.0_3.0_1725571339177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline_en_5.5.0_3.0_1725571339177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_data_with_unanswerable_23_aug_xlm_fnetune_1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|803.6 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_DATA_WITH_UNANSWERABLE_23_AUG_xlm_FNETUNE_1.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_en.md new file mode 100644 index 00000000000000..7ec6d4ee49e730 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synthetic_data_only_16_aug XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_only_16_aug +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_16_aug` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_16_aug_en_5.5.0_3.0_1725556536710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_16_aug_en_5.5.0_3.0_1725556536710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_16_aug","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_16_aug", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_16_aug| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_ONLY_16_AUG \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_pipeline_en.md new file mode 100644 index 00000000000000..860f6b7051058f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_16_aug_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synthetic_data_only_16_aug_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_only_16_aug_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_16_aug_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_16_aug_pipeline_en_5.5.0_3.0_1725556661964.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_16_aug_pipeline_en_5.5.0_3.0_1725556661964.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synthetic_data_only_16_aug_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synthetic_data_only_16_aug_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_16_aug_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_ONLY_16_AUG + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en.md new file mode 100644 index 00000000000000..a37b3d05b8d195 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en_5.5.0_3.0_1725573804059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_en_5.5.0_3.0_1725573804059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|800.1 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_ONLY_17_AUG_BASE_NEW_FINETUNED \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en.md new file mode 100644 index 00000000000000..bfbe77dbf85835 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en_5.5.0_3.0_1725573922066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline_en_5.5.0_3.0_1725573922066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_am_infoweb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|800.2 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_ONLY_17_AUG_BASE_NEW_FINETUNED + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en.md new file mode 100644 index 00000000000000..2bda29bac172ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28 XlmRoBertaForQuestionAnswering from anuragsingh28 +author: John Snow Labs +name: qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28` is a English model originally trained by anuragsingh28. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en_5.5.0_3.0_1725496930559.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28_en_5.5.0_3.0_1725496930559.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_17_aug_base_nepal_bhasa_finetuned_anuragsingh28| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|800.1 MB| + +## References + +https://huggingface.co/anuragsingh28/QA_SYNTHETIC_DATA_ONLY_17_AUG_BASE_NEW_FINETUNED \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_finetuned_v1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_finetuned_v1_0_pipeline_en.md new file mode 100644 index 00000000000000..76f11abc478aca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-qa_synthetic_data_only_finetuned_v1_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synthetic_data_only_finetuned_v1_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_only_finetuned_v1_0_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_only_finetuned_v1_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_finetuned_v1_0_pipeline_en_5.5.0_3.0_1725557160773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_only_finetuned_v1_0_pipeline_en_5.5.0_3.0_1725557160773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synthetic_data_only_finetuned_v1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synthetic_data_only_finetuned_v1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_only_finetuned_v1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|803.6 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_ONLY_Finetuned_v1.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_en.md b/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_en.md new file mode 100644 index 00000000000000..e8ee34c6e3dedd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English quality_estimation_huq_xlm_roberta_english_hungarian XlmRoBertaForSequenceClassification from NYTK +author: John Snow Labs +name: quality_estimation_huq_xlm_roberta_english_hungarian +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quality_estimation_huq_xlm_roberta_english_hungarian` is a English model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quality_estimation_huq_xlm_roberta_english_hungarian_en_5.5.0_3.0_1725529480942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quality_estimation_huq_xlm_roberta_english_hungarian_en_5.5.0_3.0_1725529480942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("quality_estimation_huq_xlm_roberta_english_hungarian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("quality_estimation_huq_xlm_roberta_english_hungarian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quality_estimation_huq_xlm_roberta_english_hungarian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|778.8 MB| + +## References + +https://huggingface.co/NYTK/quality-estimation-huq-xlm-roberta-en-hu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en.md new file mode 100644 index 00000000000000..ada760467babd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English quality_estimation_huq_xlm_roberta_english_hungarian_pipeline pipeline XlmRoBertaForSequenceClassification from NYTK +author: John Snow Labs +name: quality_estimation_huq_xlm_roberta_english_hungarian_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quality_estimation_huq_xlm_roberta_english_hungarian_pipeline` is a English model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en_5.5.0_3.0_1725529625129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quality_estimation_huq_xlm_roberta_english_hungarian_pipeline_en_5.5.0_3.0_1725529625129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("quality_estimation_huq_xlm_roberta_english_hungarian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("quality_estimation_huq_xlm_roberta_english_hungarian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quality_estimation_huq_xlm_roberta_english_hungarian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|778.8 MB| + +## References + +https://huggingface.co/NYTK/quality-estimation-huq-xlm-roberta-en-hu + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-question_answering_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-question_answering_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..d06c32665af235 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-question_answering_xlm_roberta_base_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English question_answering_xlm_roberta_base XlmRoBertaForQuestionAnswering from TunahanGokcimen +author: John Snow Labs +name: question_answering_xlm_roberta_base +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_xlm_roberta_base` is a English model originally trained by TunahanGokcimen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_xlm_roberta_base_en_5.5.0_3.0_1725567562049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_xlm_roberta_base_en_5.5.0_3.0_1725567562049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("question_answering_xlm_roberta_base","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("question_answering_xlm_roberta_base", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/TunahanGokcimen/Question-Answering-xlm-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-recipebert_en.md b/docs/_posts/ahmedlone127/2024-09-05-recipebert_en.md new file mode 100644 index 00000000000000..3846e96dd7aaee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-recipebert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English recipebert BertEmbeddings from alexdseo +author: John Snow Labs +name: recipebert +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipebert` is a English model originally trained by alexdseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipebert_en_5.5.0_3.0_1725533679486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipebert_en_5.5.0_3.0_1725533679486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("recipebert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("recipebert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipebert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/alexdseo/RecipeBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-recipebert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-recipebert_pipeline_en.md new file mode 100644 index 00000000000000..a13d7b0500e31a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-recipebert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English recipebert_pipeline pipeline BertEmbeddings from alexdseo +author: John Snow Labs +name: recipebert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipebert_pipeline` is a English model originally trained by alexdseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipebert_pipeline_en_5.5.0_3.0_1725533698897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipebert_pipeline_en_5.5.0_3.0_1725533698897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("recipebert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("recipebert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipebert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/alexdseo/RecipeBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-regr_4_en.md b/docs/_posts/ahmedlone127/2024-09-05-regr_4_en.md new file mode 100644 index 00000000000000..a10e29009095f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-regr_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English regr_4 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: regr_4 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regr_4` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regr_4_en_5.5.0_3.0_1725541968881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regr_4_en_5.5.0_3.0_1725541968881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("regr_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("regr_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regr_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Regr_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-regr_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-regr_4_pipeline_en.md new file mode 100644 index 00000000000000..57fc07b27dd37a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-regr_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English regr_4_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: regr_4_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regr_4_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regr_4_pipeline_en_5.5.0_3.0_1725541991907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regr_4_pipeline_en_5.5.0_3.0_1725541991907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("regr_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("regr_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regr_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Regr_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_en.md new file mode 100644 index 00000000000000..03866d290ea3ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English regression_albert_2 AlbertForSequenceClassification from Svetlana0303 +author: John Snow Labs +name: regression_albert_2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regression_albert_2` is a English model originally trained by Svetlana0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regression_albert_2_en_5.5.0_3.0_1725543410785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regression_albert_2_en_5.5.0_3.0_1725543410785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("regression_albert_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("regression_albert_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regression_albert_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Svetlana0303/Regression_Albert_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_pipeline_en.md new file mode 100644 index 00000000000000..4c19f1442f845f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-regression_albert_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English regression_albert_2_pipeline pipeline AlbertForSequenceClassification from Svetlana0303 +author: John Snow Labs +name: regression_albert_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regression_albert_2_pipeline` is a English model originally trained by Svetlana0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regression_albert_2_pipeline_en_5.5.0_3.0_1725543413204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regression_albert_2_pipeline_en_5.5.0_3.0_1725543413204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("regression_albert_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("regression_albert_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regression_albert_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Svetlana0303/Regression_Albert_2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_en.md b/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_en.md new file mode 100644 index 00000000000000..1c9d83029e13d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English results_benuehlinger DistilBertForSequenceClassification from Benuehlinger +author: John Snow Labs +name: results_benuehlinger +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_benuehlinger` is a English model originally trained by Benuehlinger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_benuehlinger_en_5.5.0_3.0_1725580238248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_benuehlinger_en_5.5.0_3.0_1725580238248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("results_benuehlinger","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("results_benuehlinger", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_benuehlinger| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/Benuehlinger/results \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_pipeline_en.md new file mode 100644 index 00000000000000..283a9a24f2527e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-results_benuehlinger_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English results_benuehlinger_pipeline pipeline DistilBertForSequenceClassification from Benuehlinger +author: John Snow Labs +name: results_benuehlinger_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_benuehlinger_pipeline` is a English model originally trained by Benuehlinger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_benuehlinger_pipeline_en_5.5.0_3.0_1725580251509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_benuehlinger_pipeline_en_5.5.0_3.0_1725580251509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("results_benuehlinger_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("results_benuehlinger_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_benuehlinger_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/Benuehlinger/results + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-reward_model_deberta_v3_unit_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-reward_model_deberta_v3_unit_test_pipeline_en.md new file mode 100644 index 00000000000000..ea700c1dbe2968 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-reward_model_deberta_v3_unit_test_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English reward_model_deberta_v3_unit_test_pipeline pipeline DeBertaForSequenceClassification from MaxJeblick +author: John Snow Labs +name: reward_model_deberta_v3_unit_test_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reward_model_deberta_v3_unit_test_pipeline` is a English model originally trained by MaxJeblick. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_unit_test_pipeline_en_5.5.0_3.0_1725562396014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reward_model_deberta_v3_unit_test_pipeline_en_5.5.0_3.0_1725562396014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("reward_model_deberta_v3_unit_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("reward_model_deberta_v3_unit_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reward_model_deberta_v3_unit_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|7.1 MB| + +## References + +https://huggingface.co/MaxJeblick/reward-model-deberta-v3-unit-test + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_en.md new file mode 100644 index 00000000000000..49885ff3da7199 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rise_ner_distilbert_base_cased_system_b_v1 DistilBertForTokenClassification from petersamoaa +author: John Snow Labs +name: rise_ner_distilbert_base_cased_system_b_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rise_ner_distilbert_base_cased_system_b_v1` is a English model originally trained by petersamoaa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rise_ner_distilbert_base_cased_system_b_v1_en_5.5.0_3.0_1725518637552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rise_ner_distilbert_base_cased_system_b_v1_en_5.5.0_3.0_1725518637552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("rise_ner_distilbert_base_cased_system_b_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("rise_ner_distilbert_base_cased_system_b_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rise_ner_distilbert_base_cased_system_b_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/petersamoaa/rise-ner-distilbert-base-cased-system-b-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_pipeline_en.md new file mode 100644 index 00000000000000..f390995469cccf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_distilbert_base_cased_system_b_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rise_ner_distilbert_base_cased_system_b_v1_pipeline pipeline DistilBertForTokenClassification from petersamoaa +author: John Snow Labs +name: rise_ner_distilbert_base_cased_system_b_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rise_ner_distilbert_base_cased_system_b_v1_pipeline` is a English model originally trained by petersamoaa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rise_ner_distilbert_base_cased_system_b_v1_pipeline_en_5.5.0_3.0_1725518650102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rise_ner_distilbert_base_cased_system_b_v1_pipeline_en_5.5.0_3.0_1725518650102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rise_ner_distilbert_base_cased_system_b_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rise_ner_distilbert_base_cased_system_b_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rise_ner_distilbert_base_cased_system_b_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/petersamoaa/rise-ner-distilbert-base-cased-system-b-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_en.md b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_en.md new file mode 100644 index 00000000000000..99b49fdb80c56c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rise_ner_reduced DistilBertForTokenClassification from mappelgren +author: John Snow Labs +name: rise_ner_reduced +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rise_ner_reduced` is a English model originally trained by mappelgren. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rise_ner_reduced_en_5.5.0_3.0_1725495539734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rise_ner_reduced_en_5.5.0_3.0_1725495539734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("rise_ner_reduced","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("rise_ner_reduced", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rise_ner_reduced| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mappelgren/RISE_NER_REDUCED \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_pipeline_en.md new file mode 100644 index 00000000000000..79bd0f1d2e9795 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rise_ner_reduced_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rise_ner_reduced_pipeline pipeline DistilBertForTokenClassification from mappelgren +author: John Snow Labs +name: rise_ner_reduced_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rise_ner_reduced_pipeline` is a English model originally trained by mappelgren. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rise_ner_reduced_pipeline_en_5.5.0_3.0_1725495557481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rise_ner_reduced_pipeline_en_5.5.0_3.0_1725495557481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rise_ner_reduced_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rise_ner_reduced_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rise_ner_reduced_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/mappelgren/RISE_NER_REDUCED + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_1b_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_1b_2_en.md new file mode 100644 index 00000000000000..2d74e715fa10d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_1b_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_1b_2 RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_1b_2 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_1b_2` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_1b_2_en_5.5.0_3.0_1725572013449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_1b_2_en_5.5.0_3.0_1725572013449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_1b_2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_1b_2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_1b_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|296.2 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-1B-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_en.md new file mode 100644 index 00000000000000..141f32d8944b42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_afacta RoBertaForSequenceClassification from JingweiNi +author: John Snow Labs +name: roberta_base_afacta +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_afacta` is a English model originally trained by JingweiNi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_afacta_en_5.5.0_3.0_1725542402826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_afacta_en_5.5.0_3.0_1725542402826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_afacta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_afacta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_afacta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|434.8 MB| + +## References + +https://huggingface.co/JingweiNi/roberta-base-afacta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_pipeline_en.md new file mode 100644 index 00000000000000..c23d1f8dd34053 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_afacta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_afacta_pipeline pipeline RoBertaForSequenceClassification from JingweiNi +author: John Snow Labs +name: roberta_base_afacta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_afacta_pipeline` is a English model originally trained by JingweiNi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_afacta_pipeline_en_5.5.0_3.0_1725542439470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_afacta_pipeline_en_5.5.0_3.0_1725542439470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_afacta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_afacta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_afacta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|434.8 MB| + +## References + +https://huggingface.co/JingweiNi/roberta-base-afacta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_en.md new file mode 100644 index 00000000000000..dd77f14371d468 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_bne_finetuned_tripadvisordomainadaptation RoBertaEmbeddings from vg055 +author: John Snow Labs +name: roberta_base_bne_finetuned_tripadvisordomainadaptation +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_bne_finetuned_tripadvisordomainadaptation` is a English model originally trained by vg055. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_tripadvisordomainadaptation_en_5.5.0_3.0_1725577448044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_tripadvisordomainadaptation_en_5.5.0_3.0_1725577448044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_bne_finetuned_tripadvisordomainadaptation","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_bne_finetuned_tripadvisordomainadaptation","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_bne_finetuned_tripadvisordomainadaptation| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|463.7 MB| + +## References + +https://huggingface.co/vg055/roberta-base-bne-finetuned-TripAdvisorDomainAdaptation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en.md new file mode 100644 index 00000000000000..be9657fccb9b10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline pipeline RoBertaEmbeddings from vg055 +author: John Snow Labs +name: roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline` is a English model originally trained by vg055. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en_5.5.0_3.0_1725577471948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline_en_5.5.0_3.0_1725577471948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_bne_finetuned_tripadvisordomainadaptation_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.7 MB| + +## References + +https://huggingface.co/vg055/roberta-base-bne-finetuned-TripAdvisorDomainAdaptation + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_en.md new file mode 100644 index 00000000000000..f7f22799492fc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_cheng98 RoBertaEmbeddings from Cheng98 +author: John Snow Labs +name: roberta_base_cheng98 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_cheng98` is a English model originally trained by Cheng98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_cheng98_en_5.5.0_3.0_1725565991744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_cheng98_en_5.5.0_3.0_1725565991744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_cheng98","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_cheng98","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_cheng98| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|298.2 MB| + +## References + +https://huggingface.co/Cheng98/roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_pipeline_en.md new file mode 100644 index 00000000000000..590b8eb6348b57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_cheng98_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_cheng98_pipeline pipeline RoBertaEmbeddings from Cheng98 +author: John Snow Labs +name: roberta_base_cheng98_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_cheng98_pipeline` is a English model originally trained by Cheng98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_cheng98_pipeline_en_5.5.0_3.0_1725566083686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_cheng98_pipeline_en_5.5.0_3.0_1725566083686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_cheng98_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_cheng98_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_cheng98_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|298.2 MB| + +## References + +https://huggingface.co/Cheng98/roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_52_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_52_pipeline_en.md new file mode 100644 index 00000000000000..aee17b0f7fc27e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_52_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_epoch_52_pipeline pipeline RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_52_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_52_pipeline` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_52_pipeline_en_5.5.0_3.0_1725577599754.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_52_pipeline_en_5.5.0_3.0_1725577599754.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_epoch_52_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_epoch_52_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_52_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_52 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_en.md new file mode 100644 index 00000000000000..e3a6a30c2afbcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_epoch_66 RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_66 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_66` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_66_en_5.5.0_3.0_1725572007670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_66_en_5.5.0_3.0_1725572007670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_66","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_66","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_66| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_66 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_pipeline_en.md new file mode 100644 index 00000000000000..db306218fccccf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_66_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_epoch_66_pipeline pipeline RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_66_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_66_pipeline` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_66_pipeline_en_5.5.0_3.0_1725572095992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_66_pipeline_en_5.5.0_3.0_1725572095992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_epoch_66_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_epoch_66_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_66_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_66 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_en.md new file mode 100644 index 00000000000000..666ae0c8f1acf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_epoch_9 RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_9 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_9` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_9_en_5.5.0_3.0_1725578745453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_9_en_5.5.0_3.0_1725578745453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_9","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_9","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_9| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|296.9 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_pipeline_en.md new file mode 100644 index 00000000000000..2d3def8ed1b4d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_epoch_9_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_epoch_9_pipeline pipeline RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_9_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_9_pipeline` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_9_pipeline_en_5.5.0_3.0_1725578829978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_9_pipeline_en_5.5.0_3.0_1725578829978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_epoch_9_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_epoch_9_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_9_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|296.9 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_9 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_exp_32_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_exp_32_pipeline_xx.md new file mode 100644 index 00000000000000..52473a9da07895 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_exp_32_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual roberta_base_exp_32_pipeline pipeline XlmRoBertaEmbeddings from pere +author: John Snow Labs +name: roberta_base_exp_32_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_exp_32_pipeline` is a Multilingual model originally trained by pere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_exp_32_pipeline_xx_5.5.0_3.0_1725508171805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_exp_32_pipeline_xx_5.5.0_3.0_1725508171805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_exp_32_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_exp_32_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_exp_32_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/pere/roberta-base-exp-32 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_ner_sevixdd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_ner_sevixdd_pipeline_en.md new file mode 100644 index 00000000000000..c5dc1b7790b741 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_ner_sevixdd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_ner_sevixdd_pipeline pipeline RoBertaForTokenClassification from Sevixdd +author: John Snow Labs +name: roberta_base_finetuned_ner_sevixdd_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_ner_sevixdd_pipeline` is a English model originally trained by Sevixdd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_sevixdd_pipeline_en_5.5.0_3.0_1725512436571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_ner_sevixdd_pipeline_en_5.5.0_3.0_1725512436571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_ner_sevixdd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_ner_sevixdd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_ner_sevixdd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|453.4 MB| + +## References + +https://huggingface.co/Sevixdd/roberta-base-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_en.md new file mode 100644 index 00000000000000..f1ab00f7f5cc15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_base_finetuned_squad_hasan55 RoBertaForQuestionAnswering from HASAN55 +author: John Snow Labs +name: roberta_base_finetuned_squad_hasan55 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_squad_hasan55` is a English model originally trained by HASAN55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_squad_hasan55_en_5.5.0_3.0_1725576123727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_squad_hasan55_en_5.5.0_3.0_1725576123727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_finetuned_squad_hasan55","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_finetuned_squad_hasan55", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_squad_hasan55| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|456.8 MB| + +## References + +https://huggingface.co/HASAN55/roberta-base-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_pipeline_en.md new file mode 100644 index 00000000000000..d7445ad6941357 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_squad_hasan55_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_base_finetuned_squad_hasan55_pipeline pipeline RoBertaForQuestionAnswering from HASAN55 +author: John Snow Labs +name: roberta_base_finetuned_squad_hasan55_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_squad_hasan55_pipeline` is a English model originally trained by HASAN55. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_squad_hasan55_pipeline_en_5.5.0_3.0_1725576149350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_squad_hasan55_pipeline_en_5.5.0_3.0_1725576149350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_squad_hasan55_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_squad_hasan55_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_squad_hasan55_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|456.8 MB| + +## References + +https://huggingface.co/HASAN55/roberta-base-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_en.md new file mode 100644 index 00000000000000..43b4189cbf3714 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_finetuned_wallisian_manual_2ep RoBertaEmbeddings from btamm12 +author: John Snow Labs +name: roberta_base_finetuned_wallisian_manual_2ep +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_wallisian_manual_2ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_wallisian_manual_2ep_en_5.5.0_3.0_1725577669042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_wallisian_manual_2ep_en_5.5.0_3.0_1725577669042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_finetuned_wallisian_manual_2ep","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_finetuned_wallisian_manual_2ep","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_wallisian_manual_2ep| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|463.9 MB| + +## References + +https://huggingface.co/btamm12/roberta-base-finetuned-wls-manual-2ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_pipeline_en.md new file mode 100644 index 00000000000000..d5f2fda5c8d289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_finetuned_wallisian_manual_2ep_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_wallisian_manual_2ep_pipeline pipeline RoBertaEmbeddings from btamm12 +author: John Snow Labs +name: roberta_base_finetuned_wallisian_manual_2ep_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_wallisian_manual_2ep_pipeline` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_wallisian_manual_2ep_pipeline_en_5.5.0_3.0_1725577694845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_wallisian_manual_2ep_pipeline_en_5.5.0_3.0_1725577694845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_wallisian_manual_2ep_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_wallisian_manual_2ep_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_wallisian_manual_2ep_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.9 MB| + +## References + +https://huggingface.co/btamm12/roberta-base-finetuned-wls-manual-2ep + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_marathi_marh_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_marathi_marh_en.md new file mode 100644 index 00000000000000..8577b4006bcea2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_marathi_marh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_marathi_marh RoBertaEmbeddings from flax-community +author: John Snow Labs +name: roberta_base_marathi_marh +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_marathi_marh` is a English model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_marathi_marh_en_5.5.0_3.0_1725566295262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_marathi_marh_en_5.5.0_3.0_1725566295262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_marathi_marh","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_marathi_marh","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_marathi_marh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|464.5 MB| + +## References + +https://huggingface.co/flax-community/roberta-base-mr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_en.md new file mode 100644 index 00000000000000..eddfcced826317 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_ner_conll2003 RoBertaForTokenClassification from andi611 +author: John Snow Labs +name: roberta_base_ner_conll2003 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_conll2003` is a English model originally trained by andi611. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_conll2003_en_5.5.0_3.0_1725512682232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_conll2003_en_5.5.0_3.0_1725512682232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_conll2003| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|436.8 MB| + +## References + +https://huggingface.co/andi611/roberta-base-ner-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_pipeline_en.md new file mode 100644 index 00000000000000..499bfcf164b8f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_conll2003_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_ner_conll2003_pipeline pipeline RoBertaForTokenClassification from andi611 +author: John Snow Labs +name: roberta_base_ner_conll2003_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_conll2003_pipeline` is a English model originally trained by andi611. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_conll2003_pipeline_en_5.5.0_3.0_1725512714506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_conll2003_pipeline_en_5.5.0_3.0_1725512714506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_ner_conll2003_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_ner_conll2003_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_conll2003_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|436.8 MB| + +## References + +https://huggingface.co/andi611/roberta-base-ner-conll2003 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_updated_pipeline_mn.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_updated_pipeline_mn.md new file mode 100644 index 00000000000000..76d84190e597e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_ner_updated_pipeline_mn.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Mongolian roberta_base_ner_updated_pipeline pipeline RoBertaForTokenClassification from Bachi06 +author: John Snow Labs +name: roberta_base_ner_updated_pipeline +date: 2024-09-05 +tags: [mn, open_source, pipeline, onnx] +task: Named Entity Recognition +language: mn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_updated_pipeline` is a Mongolian model originally trained by Bachi06. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_updated_pipeline_mn_5.5.0_3.0_1725512498992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_updated_pipeline_mn_5.5.0_3.0_1725512498992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_ner_updated_pipeline", lang = "mn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_ner_updated_pipeline", lang = "mn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_updated_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mn| +|Size:|465.6 MB| + +## References + +https://huggingface.co/Bachi06/roberta-base-ner-updated + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_bg.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_bg.md new file mode 100644 index 00000000000000..504ae54939ab24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_bg.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Bulgarian roberta_base_sentiment_bulgarian XlmRoBertaForSequenceClassification from rmihaylov +author: John Snow Labs +name: roberta_base_sentiment_bulgarian +date: 2024-09-05 +tags: [bg, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: bg +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_sentiment_bulgarian` is a Bulgarian model originally trained by rmihaylov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_sentiment_bulgarian_bg_5.5.0_3.0_1725526498700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_sentiment_bulgarian_bg_5.5.0_3.0_1725526498700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("roberta_base_sentiment_bulgarian","bg") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("roberta_base_sentiment_bulgarian", "bg") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_sentiment_bulgarian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|bg| +|Size:|361.4 MB| + +## References + +https://huggingface.co/rmihaylov/roberta-base-sentiment-bg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_pipeline_bg.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_pipeline_bg.md new file mode 100644 index 00000000000000..8ad34d941c0a45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_sentiment_bulgarian_pipeline_bg.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Bulgarian roberta_base_sentiment_bulgarian_pipeline pipeline XlmRoBertaForSequenceClassification from rmihaylov +author: John Snow Labs +name: roberta_base_sentiment_bulgarian_pipeline +date: 2024-09-05 +tags: [bg, open_source, pipeline, onnx] +task: Text Classification +language: bg +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_sentiment_bulgarian_pipeline` is a Bulgarian model originally trained by rmihaylov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_sentiment_bulgarian_pipeline_bg_5.5.0_3.0_1725526522511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_sentiment_bulgarian_pipeline_bg_5.5.0_3.0_1725526522511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_sentiment_bulgarian_pipeline", lang = "bg") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_sentiment_bulgarian_pipeline", lang = "bg") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_sentiment_bulgarian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|bg| +|Size:|361.5 MB| + +## References + +https://huggingface.co/rmihaylov/roberta-base-sentiment-bg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_en.md new file mode 100644 index 00000000000000..e65bfa202db4d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_base_squad2_f_arnold RoBertaForQuestionAnswering from f-arnold +author: John Snow Labs +name: roberta_base_squad2_f_arnold +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_f_arnold` is a English model originally trained by f-arnold. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_f_arnold_en_5.5.0_3.0_1725577097416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_f_arnold_en_5.5.0_3.0_1725577097416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_f_arnold","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_f_arnold", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_f_arnold| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/f-arnold/roberta-base-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_pipeline_en.md new file mode 100644 index 00000000000000..a51b9e2786f970 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_f_arnold_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_base_squad2_f_arnold_pipeline pipeline RoBertaForQuestionAnswering from f-arnold +author: John Snow Labs +name: roberta_base_squad2_f_arnold_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_f_arnold_pipeline` is a English model originally trained by f-arnold. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_f_arnold_pipeline_en_5.5.0_3.0_1725577120005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_f_arnold_pipeline_en_5.5.0_3.0_1725577120005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_squad2_f_arnold_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_squad2_f_arnold_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_f_arnold_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/f-arnold/roberta-base-squad2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_en.md new file mode 100644 index 00000000000000..c11a122c47d53f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_base_squad2_graphcore RoBertaForQuestionAnswering from Graphcore +author: John Snow Labs +name: roberta_base_squad2_graphcore +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_graphcore` is a English model originally trained by Graphcore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_graphcore_en_5.5.0_3.0_1725576797562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_graphcore_en_5.5.0_3.0_1725576797562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_graphcore","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_base_squad2_graphcore", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_graphcore| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|298.4 MB| + +## References + +https://huggingface.co/Graphcore/roberta-base-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_pipeline_en.md new file mode 100644 index 00000000000000..077087f35aef72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_base_squad2_graphcore_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_base_squad2_graphcore_pipeline pipeline RoBertaForQuestionAnswering from Graphcore +author: John Snow Labs +name: roberta_base_squad2_graphcore_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_squad2_graphcore_pipeline` is a English model originally trained by Graphcore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_graphcore_pipeline_en_5.5.0_3.0_1725576883315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_squad2_graphcore_pipeline_en_5.5.0_3.0_1725576883315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_squad2_graphcore_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_squad2_graphcore_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_squad2_graphcore_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|298.5 MB| + +## References + +https://huggingface.co/Graphcore/roberta-base-squad2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_en.md new file mode 100644 index 00000000000000..56e68f761b6434 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_conll_learning_rate2e5 RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_conll_learning_rate2e5 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_conll_learning_rate2e5` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate2e5_en_5.5.0_3.0_1725512082556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate2e5_en_5.5.0_3.0_1725512082556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_conll_learning_rate2e5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_conll_learning_rate2e5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_conll_learning_rate2e5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_conll_learning_rate2e5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_pipeline_en.md new file mode 100644 index 00000000000000..a1b146201a0b7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_conll_learning_rate2e5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_conll_learning_rate2e5_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_conll_learning_rate2e5_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_conll_learning_rate2e5_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate2e5_pipeline_en_5.5.0_3.0_1725512096888.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate2e5_pipeline_en_5.5.0_3.0_1725512096888.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_conll_learning_rate2e5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_conll_learning_rate2e5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_conll_learning_rate2e5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_conll_learning_rate2e5 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_en.md new file mode 100644 index 00000000000000..31b4a374f8c285 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_fake_news_detection RoBertaEmbeddings from Ramansh +author: John Snow Labs +name: roberta_fake_news_detection +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_fake_news_detection` is a English model originally trained by Ramansh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_fake_news_detection_en_5.5.0_3.0_1725572985811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_fake_news_detection_en_5.5.0_3.0_1725572985811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_fake_news_detection","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_fake_news_detection","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_fake_news_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/Ramansh/RoBERTa-fake-news-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_pipeline_en.md new file mode 100644 index 00000000000000..578fc86013ec22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_fake_news_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_fake_news_detection_pipeline pipeline RoBertaEmbeddings from Ramansh +author: John Snow Labs +name: roberta_fake_news_detection_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_fake_news_detection_pipeline` is a English model originally trained by Ramansh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_fake_news_detection_pipeline_en_5.5.0_3.0_1725573009990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_fake_news_detection_pipeline_en_5.5.0_3.0_1725573009990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_fake_news_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_fake_news_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_fake_news_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/Ramansh/RoBERTa-fake-news-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_go_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_go_en.md new file mode 100644 index 00000000000000..87b48a48c2f0fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_go_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_go RoBertaEmbeddings from dbernsohn +author: John Snow Labs +name: roberta_go +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_go` is a English model originally trained by dbernsohn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_go_en_5.5.0_3.0_1725566515130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_go_en_5.5.0_3.0_1725566515130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_go","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_go","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_go| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/dbernsohn/roberta-go \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_go_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_go_pipeline_en.md new file mode 100644 index 00000000000000..ac198c342e8e0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_go_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_go_pipeline pipeline RoBertaEmbeddings from dbernsohn +author: John Snow Labs +name: roberta_go_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_go_pipeline` is a English model originally trained by dbernsohn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_go_pipeline_en_5.5.0_3.0_1725566531236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_go_pipeline_en_5.5.0_3.0_1725566531236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_go_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_go_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_go_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/dbernsohn/roberta-go + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_large_1802_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_1802_en.md new file mode 100644 index 00000000000000..67a60926d23a20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_1802_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_1802 RoBertaEmbeddings from JamesKim +author: John Snow Labs +name: roberta_large_1802 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_1802` is a English model originally trained by JamesKim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_1802_en_5.5.0_3.0_1725578402246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_1802_en_5.5.0_3.0_1725578402246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_large_1802","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_large_1802","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_1802| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/JamesKim/roberta-large_1802 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_en.md new file mode 100644 index 00000000000000..5f342bdd909731 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_depression RoBertaForSequenceClassification from rafalposwiata +author: John Snow Labs +name: roberta_large_depression +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_depression` is a English model originally trained by rafalposwiata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_depression_en_5.5.0_3.0_1725542687668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_depression_en_5.5.0_3.0_1725542687668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_depression","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_depression", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_depression| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/rafalposwiata/roberta-large-depression \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_pipeline_en.md new file mode 100644 index 00000000000000..75927d899fb975 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_depression_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_depression_pipeline pipeline RoBertaForSequenceClassification from rafalposwiata +author: John Snow Labs +name: roberta_large_depression_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_depression_pipeline` is a English model originally trained by rafalposwiata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_depression_pipeline_en_5.5.0_3.0_1725542764268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_depression_pipeline_en_5.5.0_3.0_1725542764268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_depression_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_depression_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_depression_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/rafalposwiata/roberta-large-depression + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_en.md new file mode 100644 index 00000000000000..be45d50ee405aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_large_mrqa RoBertaForQuestionAnswering from VMware +author: John Snow Labs +name: roberta_large_mrqa +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_mrqa` is a English model originally trained by VMware. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_mrqa_en_5.5.0_3.0_1725576770809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_mrqa_en_5.5.0_3.0_1725576770809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_large_mrqa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_large_mrqa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_mrqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/VMware/roberta-large-mrqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_pipeline_en.md new file mode 100644 index 00000000000000..d67df5a1f2ab8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_large_mrqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_large_mrqa_pipeline pipeline RoBertaForQuestionAnswering from VMware +author: John Snow Labs +name: roberta_large_mrqa_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_mrqa_pipeline` is a English model originally trained by VMware. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_mrqa_pipeline_en_5.5.0_3.0_1725576837136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_mrqa_pipeline_en_5.5.0_3.0_1725576837136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_mrqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_mrqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_mrqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/VMware/roberta-large-mrqa + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_en.md new file mode 100644 index 00000000000000..48b0ae65ecb9d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_med_small_1m_2 RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_med_small_1m_2 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_med_small_1m_2` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_med_small_1m_2_en_5.5.0_3.0_1725571940548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_med_small_1m_2_en_5.5.0_3.0_1725571940548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_med_small_1m_2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_med_small_1m_2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_med_small_1m_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|107.9 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-med-small-1M-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_pipeline_en.md new file mode 100644 index 00000000000000..174bc797cb8a07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_med_small_1m_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_med_small_1m_2_pipeline pipeline RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_med_small_1m_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_med_small_1m_2_pipeline` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_med_small_1m_2_pipeline_en_5.5.0_3.0_1725571972921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_med_small_1m_2_pipeline_en_5.5.0_3.0_1725571972921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_med_small_1m_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_med_small_1m_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_med_small_1m_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|107.9 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-med-small-1M-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_mlm_for_protein_clustering_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_mlm_for_protein_clustering_pipeline_en.md new file mode 100644 index 00000000000000..b9a63d7a486877 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_mlm_for_protein_clustering_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_mlm_for_protein_clustering_pipeline pipeline RoBertaForSequenceClassification from shashwatsaini +author: John Snow Labs +name: roberta_mlm_for_protein_clustering_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_mlm_for_protein_clustering_pipeline` is a English model originally trained by shashwatsaini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_mlm_for_protein_clustering_pipeline_en_5.5.0_3.0_1725541687702.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_mlm_for_protein_clustering_pipeline_en_5.5.0_3.0_1725541687702.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_mlm_for_protein_clustering_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_mlm_for_protein_clustering_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_mlm_for_protein_clustering_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|315.7 MB| + +## References + +https://huggingface.co/shashwatsaini/RoBERTa-MLM-For-Protein-Clustering + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es.md new file mode 100644 index 00000000000000..ad38a7cae47ae1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish RoBertaForTokenClassification from StivenLancheros +author: John Snow Labs +name: roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish +date: 2024-09-05 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish` is a Castilian, Spanish model originally trained by StivenLancheros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es_5.5.0_3.0_1725501654974.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish_es_5.5.0_3.0_1725501654974.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ner_roberta_base_biomedical_clinical_spanish_finetuned_ner_craft_augmentedtransfer_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|448.8 MB| + +## References + +https://huggingface.co/StivenLancheros/roberta-base-biomedical-clinical-es-finetuned-ner-CRAFT_AugmentedTransfer_ES \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_en.md new file mode 100644 index 00000000000000..8284cbed150ae0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English RobertaForQuestionAnswering Cased model (from alistvt) +author: John Snow Labs +name: roberta_qa_01_dialdoc +date: 2024-09-05 +tags: [en, open_source, roberta, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `01-roberta-dialdoc` is a English model originally trained by `alistvt`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_01_dialdoc_en_5.5.0_3.0_1725576501111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_01_dialdoc_en_5.5.0_3.0_1725576501111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_01_dialdoc","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_01_dialdoc","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_01_dialdoc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/alistvt/01-roberta-dialdoc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_pipeline_en.md new file mode 100644 index 00000000000000..96089608b4ae4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_01_dialdoc_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_01_dialdoc_pipeline pipeline RoBertaForQuestionAnswering from alistvt +author: John Snow Labs +name: roberta_qa_01_dialdoc_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_01_dialdoc_pipeline` is a English model originally trained by alistvt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_01_dialdoc_pipeline_en_5.5.0_3.0_1725576568367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_01_dialdoc_pipeline_en_5.5.0_3.0_1725576568367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_01_dialdoc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_01_dialdoc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_01_dialdoc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/alistvt/01-roberta-dialdoc + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_en.md new file mode 100644 index 00000000000000..7762bc27a37a75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from vesteinn) +author: John Snow Labs +name: roberta_qa_IceBERT_QA +date: 2024-09-05 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `IceBERT-QA` is a English model originally trained by `vesteinn`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_IceBERT_QA_en_5.5.0_3.0_1725577202080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_IceBERT_QA_en_5.5.0_3.0_1725577202080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_IceBERT_QA","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_IceBERT_QA","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.by_vesteinn").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_IceBERT_QA| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|462.8 MB| + +## References + +References + +- https://huggingface.co/vesteinn/IceBERT-QA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_pipeline_en.md new file mode 100644 index 00000000000000..63806a374da664 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_IceBERT_QA_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_IceBERT_QA_pipeline pipeline RoBertaForQuestionAnswering from vesteinn +author: John Snow Labs +name: roberta_qa_IceBERT_QA_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_IceBERT_QA_pipeline` is a English model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_IceBERT_QA_pipeline_en_5.5.0_3.0_1725577225713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_IceBERT_QA_pipeline_en_5.5.0_3.0_1725577225713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_IceBERT_QA_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_IceBERT_QA_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_IceBERT_QA_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|462.8 MB| + +## References + +https://huggingface.co/vesteinn/IceBERT-QA + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..04cf6c79731e30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline pipeline RoBertaForQuestionAnswering from avioo1 +author: John Snow Labs +name: roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline` is a English model originally trained by avioo1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en_5.5.0_3.0_1725576568767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline_en_5.5.0_3.0_1725576568767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_avioo1_roberta_base_squad2_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.6 MB| + +## References + +https://huggingface.co/avioo1/roberta-base-squad2-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_base_filtered_cuad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_base_filtered_cuad_pipeline_en.md new file mode 100644 index 00000000000000..b4bf87a3f134d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_base_filtered_cuad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_base_filtered_cuad_pipeline pipeline RoBertaForQuestionAnswering from alex-apostolo +author: John Snow Labs +name: roberta_qa_base_filtered_cuad_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_base_filtered_cuad_pipeline` is a English model originally trained by alex-apostolo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_filtered_cuad_pipeline_en_5.5.0_3.0_1725576697739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_filtered_cuad_pipeline_en_5.5.0_3.0_1725576697739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_base_filtered_cuad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_base_filtered_cuad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_filtered_cuad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|453.7 MB| + +## References + +https://huggingface.co/alex-apostolo/roberta-base-filtered-cuad + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_movie_roberta_MITmovie_squad_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_movie_roberta_MITmovie_squad_en.md new file mode 100644 index 00000000000000..2d10ee54a0af74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_movie_roberta_MITmovie_squad_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from thatdramebaazguy) +author: John Snow Labs +name: roberta_qa_movie_roberta_MITmovie_squad +date: 2024-09-05 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `movie-roberta-MITmovie-squad` is a English model originally trained by `thatdramebaazguy`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_movie_roberta_MITmovie_squad_en_5.5.0_3.0_1725576091392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_movie_roberta_MITmovie_squad_en_5.5.0_3.0_1725576091392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_movie_roberta_MITmovie_squad","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_movie_roberta_MITmovie_squad","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.mitmovie_squad.roberta.by_thatdramebaazguy").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_movie_roberta_MITmovie_squad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|466.0 MB| + +## References + +References + +- https://huggingface.co/thatdramebaazguy/movie-roberta-MITmovie-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en.md new file mode 100644 index 00000000000000..fcfb65ebb6b797 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English RobertaForQuestionAnswering Base Cased model (from AnonymousSub) +author: John Snow Labs +name: roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3 +date: 2024-09-05 +tags: [en, open_source, roberta, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `recipe_triplet_recipes-roberta-base_TIMESTEP_squadv2_epochs_3` is a English model originally trained by `AnonymousSub`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en_5.5.0_3.0_1725576367398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_en_5.5.0_3.0_1725576367398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3","en")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3","en") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|466.3 MB| + +## References + +References + +- https://huggingface.co/AnonymousSub/recipe_triplet_recipes-roberta-base_TIMESTEP_squadv2_epochs_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en.md new file mode 100644 index 00000000000000..222bda996397db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline pipeline RoBertaForQuestionAnswering from AnonymousSub +author: John Snow Labs +name: roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en_5.5.0_3.0_1725576391815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline_en_5.5.0_3.0_1725576391815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_recipe_triplet_recipes_base_timestep_squadv2_epochs_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/AnonymousSub/recipe_triplet_recipes-roberta-base_TIMESTEP_squadv2_epochs_3 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_en.md new file mode 100644 index 00000000000000..3feb71a515d33b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from comacrae) +author: John Snow Labs +name: roberta_qa_roberta_paraphrasev3 +date: 2024-09-05 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-paraphrasev3` is a English model originally trained by `comacrae`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_paraphrasev3_en_5.5.0_3.0_1725576251082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_paraphrasev3_en_5.5.0_3.0_1725576251082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_roberta_paraphrasev3","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_roberta_paraphrasev3","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.paraphrasev3.by_comacrae").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_paraphrasev3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.8 MB| + +## References + +References + +- https://huggingface.co/comacrae/roberta-paraphrasev3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_pipeline_en.md new file mode 100644 index 00000000000000..1c309cbdac9183 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_qa_roberta_paraphrasev3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_roberta_paraphrasev3_pipeline pipeline RoBertaForQuestionAnswering from comacrae +author: John Snow Labs +name: roberta_qa_roberta_paraphrasev3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_roberta_paraphrasev3_pipeline` is a English model originally trained by comacrae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_paraphrasev3_pipeline_en_5.5.0_3.0_1725576274252.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_paraphrasev3_pipeline_en_5.5.0_3.0_1725576274252.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_roberta_paraphrasev3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_roberta_paraphrasev3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_paraphrasev3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.8 MB| + +## References + +https://huggingface.co/comacrae/roberta-paraphrasev3 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_small_basque_pipeline_eu.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_small_basque_pipeline_eu.md new file mode 100644 index 00000000000000..6f6a6e77f659ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_small_basque_pipeline_eu.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Basque roberta_small_basque_pipeline pipeline RoBertaEmbeddings from ClassCat +author: John Snow Labs +name: roberta_small_basque_pipeline +date: 2024-09-05 +tags: [eu, open_source, pipeline, onnx] +task: Embeddings +language: eu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_small_basque_pipeline` is a Basque model originally trained by ClassCat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_small_basque_pipeline_eu_5.5.0_3.0_1725573202700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_small_basque_pipeline_eu_5.5.0_3.0_1725573202700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_small_basque_pipeline", lang = "eu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_small_basque_pipeline", lang = "eu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_small_basque_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|eu| +|Size:|192.3 MB| + +## References + +https://huggingface.co/ClassCat/roberta-small-basque + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_stress_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_stress_detection_pipeline_en.md new file mode 100644 index 00000000000000..6f066c3f543337 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_stress_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_stress_detection_pipeline pipeline RoBertaForSequenceClassification from WakandianEngineer +author: John Snow Labs +name: roberta_stress_detection_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_stress_detection_pipeline` is a English model originally trained by WakandianEngineer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_stress_detection_pipeline_en_5.5.0_3.0_1725542501452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_stress_detection_pipeline_en_5.5.0_3.0_1725542501452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_stress_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_stress_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_stress_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/WakandianEngineer/RoBERTa-stress-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_en.md new file mode 100644 index 00000000000000..a4a2b134349946 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_xlm_finetuned_amazon_conversations RoBertaEmbeddings from afcarzero1 +author: John Snow Labs +name: roberta_xlm_finetuned_amazon_conversations +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_xlm_finetuned_amazon_conversations` is a English model originally trained by afcarzero1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_xlm_finetuned_amazon_conversations_en_5.5.0_3.0_1725578437153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_xlm_finetuned_amazon_conversations_en_5.5.0_3.0_1725578437153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_xlm_finetuned_amazon_conversations","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_xlm_finetuned_amazon_conversations","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_xlm_finetuned_amazon_conversations| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/afcarzero1/roberta-xlm-finetuned-amazon_conversations \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_pipeline_en.md new file mode 100644 index 00000000000000..17bdf363872a77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberta_xlm_finetuned_amazon_conversations_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_xlm_finetuned_amazon_conversations_pipeline pipeline RoBertaEmbeddings from afcarzero1 +author: John Snow Labs +name: roberta_xlm_finetuned_amazon_conversations_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_xlm_finetuned_amazon_conversations_pipeline` is a English model originally trained by afcarzero1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_xlm_finetuned_amazon_conversations_pipeline_en_5.5.0_3.0_1725578506109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_xlm_finetuned_amazon_conversations_pipeline_en_5.5.0_3.0_1725578506109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_xlm_finetuned_amazon_conversations_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_xlm_finetuned_amazon_conversations_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_xlm_finetuned_amazon_conversations_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/afcarzero1/roberta-xlm-finetuned-amazon_conversations + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_en.md b/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_en.md new file mode 100644 index 00000000000000..2fdc1049a1e0e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English robertacnnrnnfnntransformer RoBertaEmbeddings from Mukundhan32 +author: John Snow Labs +name: robertacnnrnnfnntransformer +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robertacnnrnnfnntransformer` is a English model originally trained by Mukundhan32. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robertacnnrnnfnntransformer_en_5.5.0_3.0_1725577726413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robertacnnrnnfnntransformer_en_5.5.0_3.0_1725577726413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("robertacnnrnnfnntransformer","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("robertacnnrnnfnntransformer","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robertacnnrnnfnntransformer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|446.9 MB| + +## References + +https://huggingface.co/Mukundhan32/RobertaCnnRnnFnnTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_pipeline_en.md new file mode 100644 index 00000000000000..efa933b5cc912d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-robertacnnrnnfnntransformer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English robertacnnrnnfnntransformer_pipeline pipeline RoBertaEmbeddings from Mukundhan32 +author: John Snow Labs +name: robertacnnrnnfnntransformer_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robertacnnrnnfnntransformer_pipeline` is a English model originally trained by Mukundhan32. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robertacnnrnnfnntransformer_pipeline_en_5.5.0_3.0_1725577755367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robertacnnrnnfnntransformer_pipeline_en_5.5.0_3.0_1725577755367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("robertacnnrnnfnntransformer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("robertacnnrnnfnntransformer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robertacnnrnnfnntransformer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|446.9 MB| + +## References + +https://huggingface.co/Mukundhan32/RobertaCnnRnnFnnTransformer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberto_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberto_en.md new file mode 100644 index 00000000000000..9ec33773402c5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberto_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberto RoBertaEmbeddings from ramonzaca +author: John Snow Labs +name: roberto +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberto` is a English model originally trained by ramonzaca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberto_en_5.5.0_3.0_1725577971810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberto_en_5.5.0_3.0_1725577971810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberto","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberto","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberto| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|238.8 MB| + +## References + +https://huggingface.co/ramonzaca/roberto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-roberto_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-roberto_pipeline_en.md new file mode 100644 index 00000000000000..9c6f68fde403c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-roberto_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberto_pipeline pipeline RoBertaEmbeddings from ramonzaca +author: John Snow Labs +name: roberto_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberto_pipeline` is a English model originally trained by ramonzaca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberto_pipeline_en_5.5.0_3.0_1725578053698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberto_pipeline_en_5.5.0_3.0_1725578053698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberto_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberto_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberto_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|238.8 MB| + +## References + +https://huggingface.co/ramonzaca/roberto + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_en.md new file mode 100644 index 00000000000000..5555ba072d35ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rockbook_finetuned_legalbert BertForQuestionAnswering from 2020Austin +author: John Snow Labs +name: rockbook_finetuned_legalbert +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rockbook_finetuned_legalbert` is a English model originally trained by 2020Austin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rockbook_finetuned_legalbert_en_5.5.0_3.0_1725559826568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rockbook_finetuned_legalbert_en_5.5.0_3.0_1725559826568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("rockbook_finetuned_legalbert","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("rockbook_finetuned_legalbert", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rockbook_finetuned_legalbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/2020Austin/rockbook-finetuned-legalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_pipeline_en.md new file mode 100644 index 00000000000000..cf795aba2a61e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rockbook_finetuned_legalbert_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rockbook_finetuned_legalbert_pipeline pipeline BertForQuestionAnswering from 2020Austin +author: John Snow Labs +name: rockbook_finetuned_legalbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rockbook_finetuned_legalbert_pipeline` is a English model originally trained by 2020Austin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rockbook_finetuned_legalbert_pipeline_en_5.5.0_3.0_1725559848593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rockbook_finetuned_legalbert_pipeline_en_5.5.0_3.0_1725559848593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rockbook_finetuned_legalbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rockbook_finetuned_legalbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rockbook_finetuned_legalbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/2020Austin/rockbook-finetuned-legalbert + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_en.md b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_en.md new file mode 100644 index 00000000000000..f0f4c7006b28b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rpa_synth1 XlmRoBertaForQuestionAnswering from prajwalJumde +author: John Snow Labs +name: rpa_synth1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth1` is a English model originally trained by prajwalJumde. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth1_en_5.5.0_3.0_1725574240323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth1_en_5.5.0_3.0_1725574240323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|795.7 MB| + +## References + +https://huggingface.co/prajwalJumde/RPA-Synth1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_en.md b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_en.md new file mode 100644 index 00000000000000..b1d9070d2b1d92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rpa_synth1_on_7_nov XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rpa_synth1_on_7_nov +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth1_on_7_nov` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth1_on_7_nov_en_5.5.0_3.0_1725571567025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth1_on_7_nov_en_5.5.0_3.0_1725571567025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth1_on_7_nov","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth1_on_7_nov", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth1_on_7_nov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.3 MB| + +## References + +https://huggingface.co/am-infoweb/RPA_Synth1_ON_7_Nov \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_pipeline_en.md new file mode 100644 index 00000000000000..8345ccd8e73302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_on_7_nov_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rpa_synth1_on_7_nov_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rpa_synth1_on_7_nov_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth1_on_7_nov_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth1_on_7_nov_pipeline_en_5.5.0_3.0_1725571690223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth1_on_7_nov_pipeline_en_5.5.0_3.0_1725571690223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rpa_synth1_on_7_nov_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rpa_synth1_on_7_nov_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth1_on_7_nov_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.3 MB| + +## References + +https://huggingface.co/am-infoweb/RPA_Synth1_ON_7_Nov + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_pipeline_en.md new file mode 100644 index 00000000000000..57b9e0e36c97e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rpa_synth1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rpa_synth1_pipeline pipeline XlmRoBertaForQuestionAnswering from prajwalJumde +author: John Snow Labs +name: rpa_synth1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth1_pipeline` is a English model originally trained by prajwalJumde. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth1_pipeline_en_5.5.0_3.0_1725574363279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth1_pipeline_en_5.5.0_3.0_1725574363279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rpa_synth1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rpa_synth1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|795.7 MB| + +## References + +https://huggingface.co/prajwalJumde/RPA-Synth1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rtmex23_pol4_cardif_en.md b/docs/_posts/ahmedlone127/2024-09-05-rtmex23_pol4_cardif_en.md new file mode 100644 index 00000000000000..e8c8cbcbc5ebea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rtmex23_pol4_cardif_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rtmex23_pol4_cardif XlmRoBertaForSequenceClassification from javilonso +author: John Snow Labs +name: rtmex23_pol4_cardif +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rtmex23_pol4_cardif` is a English model originally trained by javilonso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rtmex23_pol4_cardif_en_5.5.0_3.0_1725514565867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rtmex23_pol4_cardif_en_5.5.0_3.0_1725514565867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("rtmex23_pol4_cardif","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("rtmex23_pol4_cardif", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rtmex23_pol4_cardif| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/javilonso/rtmex23-pol4-cardif \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rubert_base_cased_conversational_ner_v3_en.md b/docs/_posts/ahmedlone127/2024-09-05-rubert_base_cased_conversational_ner_v3_en.md new file mode 100644 index 00000000000000..e0afc4dbc36d19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rubert_base_cased_conversational_ner_v3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rubert_base_cased_conversational_ner_v3 BertForTokenClassification from Data-Lab +author: John Snow Labs +name: rubert_base_cased_conversational_ner_v3 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base_cased_conversational_ner_v3` is a English model originally trained by Data-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_cased_conversational_ner_v3_en_5.5.0_3.0_1725511473905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_cased_conversational_ner_v3_en_5.5.0_3.0_1725511473905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("rubert_base_cased_conversational_ner_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("rubert_base_cased_conversational_ner_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base_cased_conversational_ner_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|662.2 MB| + +## References + +https://huggingface.co/Data-Lab/rubert-base-cased-conversational_ner-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_it.md b/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_it.md new file mode 100644 index 00000000000000..23b93f0283024f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_it.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Italian rulebert_v0_0_k0 XlmRoBertaForSequenceClassification from ribesstefano +author: John Snow Labs +name: rulebert_v0_0_k0 +date: 2024-09-05 +tags: [it, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rulebert_v0_0_k0` is a Italian model originally trained by ribesstefano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rulebert_v0_0_k0_it_5.5.0_3.0_1725529388132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rulebert_v0_0_k0_it_5.5.0_3.0_1725529388132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("rulebert_v0_0_k0","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("rulebert_v0_0_k0", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rulebert_v0_0_k0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|it| +|Size:|812.3 MB| + +## References + +https://huggingface.co/ribesstefano/RuleBert-v0.0-k0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_pipeline_it.md new file mode 100644 index 00000000000000..201c4da520f647 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rulebert_v0_0_k0_pipeline_it.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Italian rulebert_v0_0_k0_pipeline pipeline XlmRoBertaForSequenceClassification from ribesstefano +author: John Snow Labs +name: rulebert_v0_0_k0_pipeline +date: 2024-09-05 +tags: [it, open_source, pipeline, onnx] +task: Text Classification +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rulebert_v0_0_k0_pipeline` is a Italian model originally trained by ribesstefano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rulebert_v0_0_k0_pipeline_it_5.5.0_3.0_1725529517393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rulebert_v0_0_k0_pipeline_it_5.5.0_3.0_1725529517393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rulebert_v0_0_k0_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rulebert_v0_0_k0_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rulebert_v0_0_k0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|812.3 MB| + +## References + +https://huggingface.co/ribesstefano/RuleBert-v0.0-k0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-rupunct_big_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-09-05-rupunct_big_pipeline_ru.md new file mode 100644 index 00000000000000..a5077812c560a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-rupunct_big_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian rupunct_big_pipeline pipeline BertForTokenClassification from RUPunct +author: John Snow Labs +name: rupunct_big_pipeline +date: 2024-09-05 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rupunct_big_pipeline` is a Russian model originally trained by RUPunct. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rupunct_big_pipeline_ru_5.5.0_3.0_1725539410649.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rupunct_big_pipeline_ru_5.5.0_3.0_1725539410649.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rupunct_big_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rupunct_big_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rupunct_big_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|667.1 MB| + +## References + +https://huggingface.co/RUPunct/RUPunct_big + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v1_pipeline_en.md new file mode 100644 index 00000000000000..bb7656210ba5b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English salamathankstransformer_fil2en_v1_pipeline pipeline MarianTransformer from SalamaThanks +author: John Snow Labs +name: salamathankstransformer_fil2en_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`salamathankstransformer_fil2en_v1_pipeline` is a English model originally trained by SalamaThanks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v1_pipeline_en_5.5.0_3.0_1725494737155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v1_pipeline_en_5.5.0_3.0_1725494737155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("salamathankstransformer_fil2en_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("salamathankstransformer_fil2en_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|salamathankstransformer_fil2en_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|497.1 MB| + +## References + +https://huggingface.co/SalamaThanks/SalamaThanksTransformer_fil2en_v1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_en.md b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_en.md new file mode 100644 index 00000000000000..b920c86ac6433f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English salamathankstransformer_fil2en_v2 MarianTransformer from SalamaThanks +author: John Snow Labs +name: salamathankstransformer_fil2en_v2 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`salamathankstransformer_fil2en_v2` is a English model originally trained by SalamaThanks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v2_en_5.5.0_3.0_1725545857251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v2_en_5.5.0_3.0_1725545857251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("salamathankstransformer_fil2en_v2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("salamathankstransformer_fil2en_v2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|salamathankstransformer_fil2en_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|496.5 MB| + +## References + +https://huggingface.co/SalamaThanks/SalamaThanksTransformer_fil2en_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_pipeline_en.md new file mode 100644 index 00000000000000..8be36d908bdb28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-salamathankstransformer_fil2en_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English salamathankstransformer_fil2en_v2_pipeline pipeline MarianTransformer from SalamaThanks +author: John Snow Labs +name: salamathankstransformer_fil2en_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`salamathankstransformer_fil2en_v2_pipeline` is a English model originally trained by SalamaThanks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v2_pipeline_en_5.5.0_3.0_1725545883337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/salamathankstransformer_fil2en_v2_pipeline_en_5.5.0_3.0_1725545883337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("salamathankstransformer_fil2en_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("salamathankstransformer_fil2en_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|salamathankstransformer_fil2en_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|497.1 MB| + +## References + +https://huggingface.co/SalamaThanks/SalamaThanksTransformer_fil2en_v2 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-samind_sentiment_en.md b/docs/_posts/ahmedlone127/2024-09-05-samind_sentiment_en.md new file mode 100644 index 00000000000000..63d33780791589 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-samind_sentiment_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English samind_sentiment XlmRoBertaForSequenceClassification from woranit +author: John Snow Labs +name: samind_sentiment +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`samind_sentiment` is a English model originally trained by woranit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/samind_sentiment_en_5.5.0_3.0_1725537251002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/samind_sentiment_en_5.5.0_3.0_1725537251002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("samind_sentiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("samind_sentiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|samind_sentiment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|826.3 MB| + +## References + +https://huggingface.co/woranit/samind-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_en.md new file mode 100644 index 00000000000000..9c3e9d19727525 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sanskrit_saskta_distilbert DistilBertForSequenceClassification from keefezowie +author: John Snow Labs +name: sanskrit_saskta_distilbert +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sanskrit_saskta_distilbert` is a English model originally trained by keefezowie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sanskrit_saskta_distilbert_en_5.5.0_3.0_1725580093994.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sanskrit_saskta_distilbert_en_5.5.0_3.0_1725580093994.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("sanskrit_saskta_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("sanskrit_saskta_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sanskrit_saskta_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|250.8 MB| + +## References + +https://huggingface.co/keefezowie/sa_distilBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..6097b8e221ed38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sanskrit_saskta_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sanskrit_saskta_distilbert_pipeline pipeline DistilBertForSequenceClassification from keefezowie +author: John Snow Labs +name: sanskrit_saskta_distilbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sanskrit_saskta_distilbert_pipeline` is a English model originally trained by keefezowie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sanskrit_saskta_distilbert_pipeline_en_5.5.0_3.0_1725580106597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sanskrit_saskta_distilbert_pipeline_en_5.5.0_3.0_1725580106597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sanskrit_saskta_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sanskrit_saskta_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sanskrit_saskta_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.9 MB| + +## References + +https://huggingface.co/keefezowie/sa_distilBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_en.md b/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_en.md new file mode 100644 index 00000000000000..273fd9e45c6dcc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sayula_popoluca_tagger_3112_v3 BertForTokenClassification from Buseak +author: John Snow Labs +name: sayula_popoluca_tagger_3112_v3 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sayula_popoluca_tagger_3112_v3` is a English model originally trained by Buseak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sayula_popoluca_tagger_3112_v3_en_5.5.0_3.0_1725539153414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sayula_popoluca_tagger_3112_v3_en_5.5.0_3.0_1725539153414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sayula_popoluca_tagger_3112_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sayula_popoluca_tagger_3112_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sayula_popoluca_tagger_3112_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Buseak/pos_tagger_3112_v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_pipeline_en.md new file mode 100644 index 00000000000000..4a70f5970dc5a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sayula_popoluca_tagger_3112_v3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sayula_popoluca_tagger_3112_v3_pipeline pipeline BertForTokenClassification from Buseak +author: John Snow Labs +name: sayula_popoluca_tagger_3112_v3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sayula_popoluca_tagger_3112_v3_pipeline` is a English model originally trained by Buseak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sayula_popoluca_tagger_3112_v3_pipeline_en_5.5.0_3.0_1725539173782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sayula_popoluca_tagger_3112_v3_pipeline_en_5.5.0_3.0_1725539173782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sayula_popoluca_tagger_3112_v3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sayula_popoluca_tagger_3112_v3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sayula_popoluca_tagger_3112_v3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Buseak/pos_tagger_3112_v3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_pipeline_ru.md new file mode 100644 index 00000000000000..1171cec80489b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian sbert_punc_case_russian_pipeline pipeline BertForTokenClassification from kontur-ai +author: John Snow Labs +name: sbert_punc_case_russian_pipeline +date: 2024-09-05 +tags: [ru, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sbert_punc_case_russian_pipeline` is a Russian model originally trained by kontur-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sbert_punc_case_russian_pipeline_ru_5.5.0_3.0_1725539819992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sbert_punc_case_russian_pipeline_ru_5.5.0_3.0_1725539819992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sbert_punc_case_russian_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sbert_punc_case_russian_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sbert_punc_case_russian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|1.0 GB| + +## References + +https://huggingface.co/kontur-ai/sbert_punc_case_ru + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_ru.md b/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_ru.md new file mode 100644 index 00000000000000..2c6384916e712f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sbert_punc_case_russian_ru.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Russian sbert_punc_case_russian BertForTokenClassification from kontur-ai +author: John Snow Labs +name: sbert_punc_case_russian +date: 2024-09-05 +tags: [ru, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sbert_punc_case_russian` is a Russian model originally trained by kontur-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sbert_punc_case_russian_ru_5.5.0_3.0_1725539532660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sbert_punc_case_russian_ru_5.5.0_3.0_1725539532660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sbert_punc_case_russian","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sbert_punc_case_russian", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sbert_punc_case_russian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|ru| +|Size:|1.0 GB| + +## References + +https://huggingface.co/kontur-ai/sbert_punc_case_ru \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx.md new file mode 100644 index 00000000000000..e19fbef913e31e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline` is a Multilingual model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx_5.5.0_3.0_1725526718547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline_xx_5.5.0_3.0_1725526718547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|883.9 MB| + +## References + +https://huggingface.co/haryoaw/scenario-non-kd-from-scratch-data-tyqiangz-multilingual-sentiments-malay-model-x + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx.md new file mode 100644 index 00000000000000..632e92307bc493 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x +date: 2024-09-05 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x` is a Multilingual model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx_5.5.0_3.0_1725526673254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x_xx_5.5.0_3.0_1725526673254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_from_scratch_data_tyqiangz_multilingual_sentiments_malay_model_x| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|883.9 MB| + +## References + +https://huggingface.co/haryoaw/scenario-non-kd-from-scratch-data-tyqiangz-multilingual-sentiments-malay-model-x \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en.md new file mode 100644 index 00000000000000..9a94727570667b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en_5.5.0_3.0_1725528987664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_en_5.5.0_3.0_1725528987664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-PO-COPY-CDF-CL-D2_data-cl-cardiff_cl_only_gamma \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en.md new file mode 100644 index 00000000000000..e000bab75e00c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en_5.5.0_3.0_1725529063944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline_en_5.5.0_3.0_1725529063944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_po_copy_cdf_cl_d2_data_cl_cardiff_cl_only_gamma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-PO-COPY-CDF-CL-D2_data-cl-cardiff_cl_only_gamma + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en.md new file mode 100644 index 00000000000000..2ae24130e74b23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en_5.5.0_3.0_1725529772000.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_en_5.5.0_3.0_1725529772000.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|689.1 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-PR-COPY-CDF-CL-D2_data-cl-cardiff_cl_only_alpha \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en.md new file mode 100644 index 00000000000000..451d614ad8a21d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en_5.5.0_3.0_1725529848032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline_en_5.5.0_3.0_1725529848032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_pr_copy_cdf_cl_d2_data_cl_cardiff_cl_only_alpha_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|689.1 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-PR-COPY-CDF-CL-D2_data-cl-cardiff_cl_only_alpha + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en.md new file mode 100644 index 00000000000000..f074ecec8c4475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en_5.5.0_3.0_1725527364805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_en_5.5.0_3.0_1725527364805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|884.2 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-SCR-D2_data-AmazonScience_massive_all_1_1_b \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en.md new file mode 100644 index 00000000000000..a233df5505ba02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en_5.5.0_3.0_1725527408483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline_en_5.5.0_3.0_1725527408483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_b_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|884.3 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-SCR-D2_data-AmazonScience_massive_all_1_1_b + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_en.md new file mode 100644 index 00000000000000..db48d212268233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_tcr_data_cl_cardiff_cl_only29297 XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_tcr_data_cl_cardiff_cl_only29297 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_tcr_data_cl_cardiff_cl_only29297` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_tcr_data_cl_cardiff_cl_only29297_en_5.5.0_3.0_1725530226770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_tcr_data_cl_cardiff_cl_only29297_en_5.5.0_3.0_1725530226770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_tcr_data_cl_cardiff_cl_only29297","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_tcr_data_cl_cardiff_cl_only29297", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_tcr_data_cl_cardiff_cl_only29297| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|849.0 MB| + +## References + +https://huggingface.co/haryoaw/scenario-TCR_data-cl-cardiff_cl_only29297 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en.md new file mode 100644 index 00000000000000..f8b635493b7ea6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_tcr_data_cl_cardiff_cl_only29297_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_tcr_data_cl_cardiff_cl_only29297_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_tcr_data_cl_cardiff_cl_only29297_pipeline` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en_5.5.0_3.0_1725530312214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_tcr_data_cl_cardiff_cl_only29297_pipeline_en_5.5.0_3.0_1725530312214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_tcr_data_cl_cardiff_cl_only29297_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_tcr_data_cl_cardiff_cl_only29297_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_tcr_data_cl_cardiff_cl_only29297_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|849.1 MB| + +## References + +https://huggingface.co/haryoaw/scenario-TCR_data-cl-cardiff_cl_only29297 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scibert_finetuned_ades_sonatafyai_en.md b/docs/_posts/ahmedlone127/2024-09-05-scibert_finetuned_ades_sonatafyai_en.md new file mode 100644 index 00000000000000..d7ce1c18c06060 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scibert_finetuned_ades_sonatafyai_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scibert_finetuned_ades_sonatafyai BertForTokenClassification from Sonatafyai +author: John Snow Labs +name: scibert_finetuned_ades_sonatafyai +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_finetuned_ades_sonatafyai` is a English model originally trained by Sonatafyai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_finetuned_ades_sonatafyai_en_5.5.0_3.0_1725511713452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_finetuned_ades_sonatafyai_en_5.5.0_3.0_1725511713452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("scibert_finetuned_ades_sonatafyai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("scibert_finetuned_ades_sonatafyai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_finetuned_ades_sonatafyai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/Sonatafyai/scibert-finetuned_ADEs_SonatafyAI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_lt.md b/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_lt.md new file mode 100644 index 00000000000000..c5d8d3f8e71447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_lt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Lithuanian scoris_maltese_lithuanian_english MarianTransformer from scoris +author: John Snow Labs +name: scoris_maltese_lithuanian_english +date: 2024-09-05 +tags: [lt, open_source, onnx, translation, marian] +task: Translation +language: lt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scoris_maltese_lithuanian_english` is a Lithuanian model originally trained by scoris. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scoris_maltese_lithuanian_english_lt_5.5.0_3.0_1725545786494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scoris_maltese_lithuanian_english_lt_5.5.0_3.0_1725545786494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("scoris_maltese_lithuanian_english","lt") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("scoris_maltese_lithuanian_english","lt") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scoris_maltese_lithuanian_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|lt| +|Size:|1.3 GB| + +## References + +https://huggingface.co/scoris/scoris-mt-lt-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_pipeline_lt.md b/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_pipeline_lt.md new file mode 100644 index 00000000000000..38c75d80dd69bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-scoris_maltese_lithuanian_english_pipeline_lt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Lithuanian scoris_maltese_lithuanian_english_pipeline pipeline MarianTransformer from scoris +author: John Snow Labs +name: scoris_maltese_lithuanian_english_pipeline +date: 2024-09-05 +tags: [lt, open_source, pipeline, onnx] +task: Translation +language: lt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scoris_maltese_lithuanian_english_pipeline` is a Lithuanian model originally trained by scoris. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scoris_maltese_lithuanian_english_pipeline_lt_5.5.0_3.0_1725545857263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scoris_maltese_lithuanian_english_pipeline_lt_5.5.0_3.0_1725545857263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scoris_maltese_lithuanian_english_pipeline", lang = "lt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scoris_maltese_lithuanian_english_pipeline", lang = "lt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scoris_maltese_lithuanian_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|lt| +|Size:|1.3 GB| + +## References + +https://huggingface.co/scoris/scoris-mt-lt-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-secdisclosure_28l_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-secdisclosure_28l_pipeline_en.md new file mode 100644 index 00000000000000..68c6dee4e8f0bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-secdisclosure_28l_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English secdisclosure_28l_pipeline pipeline RoBertaForSequenceClassification from EGAPE +author: John Snow Labs +name: secdisclosure_28l_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`secdisclosure_28l_pipeline` is a English model originally trained by EGAPE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/secdisclosure_28l_pipeline_en_5.5.0_3.0_1725541687698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/secdisclosure_28l_pipeline_en_5.5.0_3.0_1725541687698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("secdisclosure_28l_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("secdisclosure_28l_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|secdisclosure_28l_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.7 MB| + +## References + +https://huggingface.co/EGAPE/secdisclosure-28l + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_arbertv2_ar.md b/docs/_posts/ahmedlone127/2024-09-05-sent_arbertv2_ar.md new file mode 100644 index 00000000000000..77fab6ef09bed1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_arbertv2_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic sent_arbertv2 BertSentenceEmbeddings from UBC-NLP +author: John Snow Labs +name: sent_arbertv2 +date: 2024-09-05 +tags: [ar, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_arbertv2` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_arbertv2_ar_5.5.0_3.0_1725520723883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_arbertv2_ar_5.5.0_3.0_1725520723883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_arbertv2","ar") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_arbertv2","ar") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_arbertv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|607.1 MB| + +## References + +https://huggingface.co/UBC-NLP/ARBERTv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_pipeline_xx.md new file mode 100644 index 00000000000000..d026df3664886e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_awesome_align_with_corsican_pipeline pipeline BertSentenceEmbeddings from aneuraz +author: John Snow Labs +name: sent_awesome_align_with_corsican_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_awesome_align_with_corsican_pipeline` is a Multilingual model originally trained by aneuraz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_awesome_align_with_corsican_pipeline_xx_5.5.0_3.0_1725521570204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_awesome_align_with_corsican_pipeline_xx_5.5.0_3.0_1725521570204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_awesome_align_with_corsican_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_awesome_align_with_corsican_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_awesome_align_with_corsican_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.6 MB| + +## References + +https://huggingface.co/aneuraz/awesome-align-with-co + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_xx.md b/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_xx.md new file mode 100644 index 00000000000000..6bde2f11b274b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_awesome_align_with_corsican_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_awesome_align_with_corsican BertSentenceEmbeddings from aneuraz +author: John Snow Labs +name: sent_awesome_align_with_corsican +date: 2024-09-05 +tags: [xx, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_awesome_align_with_corsican` is a Multilingual model originally trained by aneuraz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_awesome_align_with_corsican_xx_5.5.0_3.0_1725521539224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_awesome_align_with_corsican_xx_5.5.0_3.0_1725521539224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_awesome_align_with_corsican","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_awesome_align_with_corsican","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_awesome_align_with_corsican| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/aneuraz/awesome-align-with-co \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_berel_finetuned_dss_maskedlm_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_berel_finetuned_dss_maskedlm_en.md new file mode 100644 index 00000000000000..0e56cdf67fc437 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_berel_finetuned_dss_maskedlm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_berel_finetuned_dss_maskedlm BertSentenceEmbeddings from yonatanlou +author: John Snow Labs +name: sent_berel_finetuned_dss_maskedlm +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_berel_finetuned_dss_maskedlm` is a English model originally trained by yonatanlou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_berel_finetuned_dss_maskedlm_en_5.5.0_3.0_1725521224988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_berel_finetuned_dss_maskedlm_en_5.5.0_3.0_1725521224988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_berel_finetuned_dss_maskedlm","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_berel_finetuned_dss_maskedlm","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_berel_finetuned_dss_maskedlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/yonatanlou/BEREL-finetuned-DSS-maskedLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bert_base_qarib_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_base_qarib_pipeline_ar.md new file mode 100644 index 00000000000000..11a3f6d1752948 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_base_qarib_pipeline_ar.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Arabic sent_bert_base_qarib_pipeline pipeline BertSentenceEmbeddings from qarib +author: John Snow Labs +name: sent_bert_base_qarib_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_qarib_pipeline` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_qarib_pipeline_ar_5.5.0_3.0_1725521444059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_qarib_pipeline_ar_5.5.0_3.0_1725521444059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_qarib_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_qarib_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_qarib_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|504.6 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt.md new file mode 100644 index 00000000000000..59a96ed7841fe1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt.md @@ -0,0 +1,80 @@ +--- +layout: model +title: Portuguese Legal BERT Sentence Embedding Large Cased model +author: John Snow Labs +name: sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1 +date: 2024-09-05 +tags: [bert, pt, embeddings, sentence, open_source, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Legal BERT Sentence Embedding model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-MetaKD-v1` is a Portuguese model originally trained by `stjiris`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt_5.5.0_3.0_1725521033090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1_pt_5.5.0_3.0_1725521033090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1", "pt") \ + .setInputCols("sentence") \ + .setOutputCol("bert_sentence") + + nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, sent_embeddings ]) + result = pipeline.fit(data).transform(data) +``` +```scala +val sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1", "pt") + .setInputCols("sentence") + .setOutputCol("bert_sentence") + + val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, sent_embeddings )) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +References + +- https://huggingface.co/stjiris/bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-MetaKD-v1 +- https://github.com/rufimelo99/metadata-knowledge-distillation +- https://github.com/rufimelo99 +- https://rufimelo99.github.io/SemanticSearchSystemForSTJ/ +- https://www.SBERT.net +- https://www.inesc-id.pt/projects/PR07005/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt.md new file mode 100644 index 00000000000000..4c6606b4797d79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Portuguese sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline pipeline BertSentenceEmbeddings from stjiris +author: John Snow Labs +name: sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline +date: 2024-09-05 +tags: [pt, open_source, pipeline, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline` is a Portuguese model originally trained by stjiris. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt_5.5.0_3.0_1725521743595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline_pt_5.5.0_3.0_1725521743595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/stjiris/bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_pipeline_tl.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_pipeline_tl.md new file mode 100644 index 00000000000000..0ada905cb51df3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_pipeline_tl.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Tagalog sent_bert_tagalog_base_uncased_pipeline pipeline BertSentenceEmbeddings from jcblaise +author: John Snow Labs +name: sent_bert_tagalog_base_uncased_pipeline +date: 2024-09-05 +tags: [tl, open_source, pipeline, onnx] +task: Embeddings +language: tl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_tagalog_base_uncased_pipeline` is a Tagalog model originally trained by jcblaise. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_tagalog_base_uncased_pipeline_tl_5.5.0_3.0_1725534854046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_tagalog_base_uncased_pipeline_tl_5.5.0_3.0_1725534854046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_tagalog_base_uncased_pipeline", lang = "tl") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_tagalog_base_uncased_pipeline", lang = "tl") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_tagalog_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tl| +|Size:|407.5 MB| + +## References + +https://huggingface.co/jcblaise/bert-tagalog-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_tl.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_tl.md new file mode 100644 index 00000000000000..aaf3a997d204f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bert_tagalog_base_uncased_tl.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Tagalog sent_bert_tagalog_base_uncased BertSentenceEmbeddings from jcblaise +author: John Snow Labs +name: sent_bert_tagalog_base_uncased +date: 2024-09-05 +tags: [tl, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: tl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_tagalog_base_uncased` is a Tagalog model originally trained by jcblaise. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_tagalog_base_uncased_tl_5.5.0_3.0_1725534834612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_tagalog_base_uncased_tl_5.5.0_3.0_1725534834612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_tagalog_base_uncased","tl") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_tagalog_base_uncased","tl") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_tagalog_base_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|tl| +|Size:|406.9 MB| + +## References + +https://huggingface.co/jcblaise/bert-tagalog-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_en.md new file mode 100644 index 00000000000000..797aa1d482cc14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bioformer_16l BertSentenceEmbeddings from bioformers +author: John Snow Labs +name: sent_bioformer_16l +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bioformer_16l` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bioformer_16l_en_5.5.0_3.0_1725521190057.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bioformer_16l_en_5.5.0_3.0_1725521190057.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bioformer_16l","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bioformer_16l","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bioformer_16l| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|155.3 MB| + +## References + +https://huggingface.co/bioformers/bioformer-16L \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_pipeline_en.md new file mode 100644 index 00000000000000..1244bc3453efe6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_bioformer_16l_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bioformer_16l_pipeline pipeline BertSentenceEmbeddings from bioformers +author: John Snow Labs +name: sent_bioformer_16l_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bioformer_16l_pipeline` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bioformer_16l_pipeline_en_5.5.0_3.0_1725521197895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bioformer_16l_pipeline_en_5.5.0_3.0_1725521197895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bioformer_16l_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bioformer_16l_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bioformer_16l_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|155.8 MB| + +## References + +https://huggingface.co/bioformers/bioformer-16L + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_checkpoint_21200_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_checkpoint_21200_en.md new file mode 100644 index 00000000000000..8a3b399158bcda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_checkpoint_21200_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_checkpoint_21200 XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_21200 +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_21200` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_21200_en_5.5.0_3.0_1725504276418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_21200_en_5.5.0_3.0_1725504276418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_21200","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_21200","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_21200| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-21200 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_en.md new file mode 100644 index 00000000000000..a212d28e6716d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_chemical_bert_uncased BertSentenceEmbeddings from recobo +author: John Snow Labs +name: sent_chemical_bert_uncased +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_chemical_bert_uncased` is a English model originally trained by recobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_chemical_bert_uncased_en_5.5.0_3.0_1725534949642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_chemical_bert_uncased_en_5.5.0_3.0_1725534949642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_chemical_bert_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_chemical_bert_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_chemical_bert_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/recobo/chemical-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_pipeline_en.md new file mode 100644 index 00000000000000..bc87ac1c6c6362 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_chemical_bert_uncased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_chemical_bert_uncased_pipeline pipeline BertSentenceEmbeddings from recobo +author: John Snow Labs +name: sent_chemical_bert_uncased_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_chemical_bert_uncased_pipeline` is a English model originally trained by recobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_chemical_bert_uncased_pipeline_en_5.5.0_3.0_1725534969364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_chemical_bert_uncased_pipeline_en_5.5.0_3.0_1725534969364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_chemical_bert_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_chemical_bert_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_chemical_bert_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/recobo/chemical-bert-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_corsican_condenser_marco_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_corsican_condenser_marco_pipeline_en.md new file mode 100644 index 00000000000000..e7343dcb76b62c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_corsican_condenser_marco_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_corsican_condenser_marco_pipeline pipeline BertSentenceEmbeddings from Luyu +author: John Snow Labs +name: sent_corsican_condenser_marco_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_corsican_condenser_marco_pipeline` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_corsican_condenser_marco_pipeline_en_5.5.0_3.0_1725520834173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_corsican_condenser_marco_pipeline_en_5.5.0_3.0_1725520834173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_corsican_condenser_marco_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_corsican_condenser_marco_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_corsican_condenser_marco_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Luyu/co-condenser-marco + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_ar.md b/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_ar.md new file mode 100644 index 00000000000000..db50988cf5326b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic sent_darijabert_arabizi BertSentenceEmbeddings from SI2M-Lab +author: John Snow Labs +name: sent_darijabert_arabizi +date: 2024-09-05 +tags: [ar, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_darijabert_arabizi` is a Arabic model originally trained by SI2M-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_darijabert_arabizi_ar_5.5.0_3.0_1725520993549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_darijabert_arabizi_ar_5.5.0_3.0_1725520993549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_darijabert_arabizi","ar") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_darijabert_arabizi","ar") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_darijabert_arabizi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|634.9 MB| + +## References + +https://huggingface.co/SI2M-Lab/DarijaBERT-arabizi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_pipeline_ar.md new file mode 100644 index 00000000000000..e6fd26c3351dfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_darijabert_arabizi_pipeline_ar.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Arabic sent_darijabert_arabizi_pipeline pipeline BertSentenceEmbeddings from SI2M-Lab +author: John Snow Labs +name: sent_darijabert_arabizi_pipeline +date: 2024-09-05 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_darijabert_arabizi_pipeline` is a Arabic model originally trained by SI2M-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_darijabert_arabizi_pipeline_ar_5.5.0_3.0_1725521023778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_darijabert_arabizi_pipeline_ar_5.5.0_3.0_1725521023778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_darijabert_arabizi_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_darijabert_arabizi_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_darijabert_arabizi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|635.4 MB| + +## References + +https://huggingface.co/SI2M-Lab/DarijaBERT-arabizi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_fr.md b/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_fr.md new file mode 100644 index 00000000000000..498127c1c8134a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French sent_drbert_4gb_cp_pubmedbert BertSentenceEmbeddings from Dr-BERT +author: John Snow Labs +name: sent_drbert_4gb_cp_pubmedbert +date: 2024-09-05 +tags: [fr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_drbert_4gb_cp_pubmedbert` is a French model originally trained by Dr-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_drbert_4gb_cp_pubmedbert_fr_5.5.0_3.0_1725535196822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_drbert_4gb_cp_pubmedbert_fr_5.5.0_3.0_1725535196822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_drbert_4gb_cp_pubmedbert","fr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_drbert_4gb_cp_pubmedbert","fr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_drbert_4gb_cp_pubmedbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Dr-BERT/DrBERT-4GB-CP-PubMedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_pipeline_fr.md new file mode 100644 index 00000000000000..38a66d086e9fdc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_drbert_4gb_cp_pubmedbert_pipeline_fr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: French sent_drbert_4gb_cp_pubmedbert_pipeline pipeline BertSentenceEmbeddings from Dr-BERT +author: John Snow Labs +name: sent_drbert_4gb_cp_pubmedbert_pipeline +date: 2024-09-05 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_drbert_4gb_cp_pubmedbert_pipeline` is a French model originally trained by Dr-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_drbert_4gb_cp_pubmedbert_pipeline_fr_5.5.0_3.0_1725535216484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_drbert_4gb_cp_pubmedbert_pipeline_fr_5.5.0_3.0_1725535216484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_drbert_4gb_cp_pubmedbert_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_drbert_4gb_cp_pubmedbert_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_drbert_4gb_cp_pubmedbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|408.7 MB| + +## References + +https://huggingface.co/Dr-BERT/DrBERT-4GB-CP-PubMedBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_fairlex_cail_minilm_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-05-sent_fairlex_cail_minilm_pipeline_zh.md new file mode 100644 index 00000000000000..8c94556831fe82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_fairlex_cail_minilm_pipeline_zh.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Chinese sent_fairlex_cail_minilm_pipeline pipeline XlmRoBertaSentenceEmbeddings from coastalcph +author: John Snow Labs +name: sent_fairlex_cail_minilm_pipeline +date: 2024-09-05 +tags: [zh, open_source, pipeline, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_fairlex_cail_minilm_pipeline` is a Chinese model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_fairlex_cail_minilm_pipeline_zh_5.5.0_3.0_1725504465975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_fairlex_cail_minilm_pipeline_zh_5.5.0_3.0_1725504465975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_fairlex_cail_minilm_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_fairlex_cail_minilm_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_fairlex_cail_minilm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|403.5 MB| + +## References + +https://huggingface.co/coastalcph/fairlex-cail-minilm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_furina_indic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_furina_indic_pipeline_en.md new file mode 100644 index 00000000000000..50863f912b0eac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_furina_indic_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_furina_indic_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_furina_indic_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_furina_indic_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_furina_indic_pipeline_en_5.5.0_3.0_1725505183595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_furina_indic_pipeline_en_5.5.0_3.0_1725505183595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_furina_indic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_furina_indic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_furina_indic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/furina-indic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_de.md b/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_de.md new file mode 100644 index 00000000000000..0dbc47d675afc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German sent_german_medbert BertSentenceEmbeddings from smanjil +author: John Snow Labs +name: sent_german_medbert +date: 2024-09-05 +tags: [de, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_german_medbert` is a German model originally trained by smanjil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_german_medbert_de_5.5.0_3.0_1725521407387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_german_medbert_de_5.5.0_3.0_1725521407387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_german_medbert","de") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_german_medbert","de") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_german_medbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/smanjil/German-MedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_pipeline_de.md new file mode 100644 index 00000000000000..9daf2e5af4f42d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_german_medbert_pipeline_de.md @@ -0,0 +1,71 @@ +--- +layout: model +title: German sent_german_medbert_pipeline pipeline BertSentenceEmbeddings from smanjil +author: John Snow Labs +name: sent_german_medbert_pipeline +date: 2024-09-05 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_german_medbert_pipeline` is a German model originally trained by smanjil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_german_medbert_pipeline_de_5.5.0_3.0_1725521426486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_german_medbert_pipeline_de_5.5.0_3.0_1725521426486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_german_medbert_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_german_medbert_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_german_medbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|407.4 MB| + +## References + +https://huggingface.co/smanjil/German-MedBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_en.md new file mode 100644 index 00000000000000..fd1746455279b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_gujibert_fan BertSentenceEmbeddings from hsc748NLP +author: John Snow Labs +name: sent_gujibert_fan +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gujibert_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gujibert_fan_en_5.5.0_3.0_1725534972065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gujibert_fan_en_5.5.0_3.0_1725534972065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_gujibert_fan","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_gujibert_fan","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gujibert_fan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.2 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiBERT_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_pipeline_en.md new file mode 100644 index 00000000000000..5e3d5ec9bae37d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_gujibert_fan_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_gujibert_fan_pipeline pipeline BertSentenceEmbeddings from hsc748NLP +author: John Snow Labs +name: sent_gujibert_fan_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gujibert_fan_pipeline` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gujibert_fan_pipeline_en_5.5.0_3.0_1725534993027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gujibert_fan_pipeline_en_5.5.0_3.0_1725534993027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_gujibert_fan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_gujibert_fan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gujibert_fan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|420.8 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiBERT_fan + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_en.md new file mode 100644 index 00000000000000..eb1143cae608ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_hebert BertSentenceEmbeddings from avichr +author: John Snow Labs +name: sent_hebert +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hebert` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hebert_en_5.5.0_3.0_1725520768317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hebert_en_5.5.0_3.0_1725520768317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hebert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hebert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hebert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/avichr/heBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_pipeline_en.md new file mode 100644 index 00000000000000..297c6e951d488d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_hebert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_hebert_pipeline pipeline BertSentenceEmbeddings from avichr +author: John Snow Labs +name: sent_hebert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hebert_pipeline` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hebert_pipeline_en_5.5.0_3.0_1725520787706.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hebert_pipeline_en_5.5.0_3.0_1725520787706.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hebert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hebert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hebert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.6 MB| + +## References + +https://huggingface.co/avichr/heBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_en.md new file mode 100644 index 00000000000000..c205a5e41e6f94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_incaselawbert BertSentenceEmbeddings from law-ai +author: John Snow Labs +name: sent_incaselawbert +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_incaselawbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_incaselawbert_en_5.5.0_3.0_1725535366417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_incaselawbert_en_5.5.0_3.0_1725535366417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_incaselawbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_incaselawbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_incaselawbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/law-ai/InCaseLawBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_pipeline_en.md new file mode 100644 index 00000000000000..4be44b58f802b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_incaselawbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_incaselawbert_pipeline pipeline BertSentenceEmbeddings from law-ai +author: John Snow Labs +name: sent_incaselawbert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_incaselawbert_pipeline` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_incaselawbert_pipeline_en_5.5.0_3.0_1725535385909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_incaselawbert_pipeline_en_5.5.0_3.0_1725535385909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_incaselawbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_incaselawbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_incaselawbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/law-ai/InCaseLawBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_en.md new file mode 100644 index 00000000000000..045f2d05d989d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_jurisbert_base_portuguese_uncased BertSentenceEmbeddings from alfaneo +author: John Snow Labs +name: sent_jurisbert_base_portuguese_uncased +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_jurisbert_base_portuguese_uncased` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_jurisbert_base_portuguese_uncased_en_5.5.0_3.0_1725534611827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_jurisbert_base_portuguese_uncased_en_5.5.0_3.0_1725534611827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_jurisbert_base_portuguese_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_jurisbert_base_portuguese_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_jurisbert_base_portuguese_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/alfaneo/jurisbert-base-portuguese-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_pipeline_en.md new file mode 100644 index 00000000000000..698ffd0971d264 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_jurisbert_base_portuguese_uncased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_jurisbert_base_portuguese_uncased_pipeline pipeline BertSentenceEmbeddings from alfaneo +author: John Snow Labs +name: sent_jurisbert_base_portuguese_uncased_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_jurisbert_base_portuguese_uncased_pipeline` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_jurisbert_base_portuguese_uncased_pipeline_en_5.5.0_3.0_1725534633265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_jurisbert_base_portuguese_uncased_pipeline_en_5.5.0_3.0_1725534633265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_jurisbert_base_portuguese_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_jurisbert_base_portuguese_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_jurisbert_base_portuguese_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/alfaneo/jurisbert-base-portuguese-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_ko.md b/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_ko.md new file mode 100644 index 00000000000000..957c705b41f037 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_ko.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Korean sent_kcbert_base BertSentenceEmbeddings from beomi +author: John Snow Labs +name: sent_kcbert_base +date: 2024-09-05 +tags: [ko, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_kcbert_base` is a Korean model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_kcbert_base_ko_5.5.0_3.0_1725521264404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_kcbert_base_ko_5.5.0_3.0_1725521264404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_kcbert_base","ko") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_kcbert_base","ko") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_kcbert_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|406.2 MB| + +## References + +https://huggingface.co/beomi/kcbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_pipeline_ko.md new file mode 100644 index 00000000000000..8737c58e7169b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_kcbert_base_pipeline_ko.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Korean sent_kcbert_base_pipeline pipeline BertSentenceEmbeddings from beomi +author: John Snow Labs +name: sent_kcbert_base_pipeline +date: 2024-09-05 +tags: [ko, open_source, pipeline, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_kcbert_base_pipeline` is a Korean model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_kcbert_base_pipeline_ko_5.5.0_3.0_1725521284730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_kcbert_base_pipeline_ko_5.5.0_3.0_1725521284730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_kcbert_base_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_kcbert_base_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_kcbert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|406.8 MB| + +## References + +https://huggingface.co/beomi/kcbert-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_en.md new file mode 100644 index 00000000000000..2b55c4e807cd83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_malay_coa_legal_bert_base_uncased BertSentenceEmbeddings from thombrysmith +author: John Snow Labs +name: sent_malay_coa_legal_bert_base_uncased +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_malay_coa_legal_bert_base_uncased` is a English model originally trained by thombrysmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_malay_coa_legal_bert_base_uncased_en_5.5.0_3.0_1725535231625.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_malay_coa_legal_bert_base_uncased_en_5.5.0_3.0_1725535231625.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_malay_coa_legal_bert_base_uncased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_malay_coa_legal_bert_base_uncased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_malay_coa_legal_bert_base_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/thombrysmith/ms-coa-legal-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_pipeline_en.md new file mode 100644 index 00000000000000..7361af3fe7dd5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_malay_coa_legal_bert_base_uncased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_malay_coa_legal_bert_base_uncased_pipeline pipeline BertSentenceEmbeddings from thombrysmith +author: John Snow Labs +name: sent_malay_coa_legal_bert_base_uncased_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_malay_coa_legal_bert_base_uncased_pipeline` is a English model originally trained by thombrysmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_malay_coa_legal_bert_base_uncased_pipeline_en_5.5.0_3.0_1725535250899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_malay_coa_legal_bert_base_uncased_pipeline_en_5.5.0_3.0_1725535250899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_malay_coa_legal_bert_base_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_malay_coa_legal_bert_base_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_malay_coa_legal_bert_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/thombrysmith/ms-coa-legal-bert-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_en.md new file mode 100644 index 00000000000000..43d95a31861cad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_nepalibert BertSentenceEmbeddings from Rajan +author: John Snow Labs +name: sent_nepalibert +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_nepalibert` is a English model originally trained by Rajan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_nepalibert_en_5.5.0_3.0_1725521203640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_nepalibert_en_5.5.0_3.0_1725521203640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_nepalibert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_nepalibert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_nepalibert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|305.5 MB| + +## References + +https://huggingface.co/Rajan/NepaliBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_pipeline_en.md new file mode 100644 index 00000000000000..f18c122b2e2427 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_nepalibert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_nepalibert_pipeline pipeline BertSentenceEmbeddings from Rajan +author: John Snow Labs +name: sent_nepalibert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_nepalibert_pipeline` is a English model originally trained by Rajan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_nepalibert_pipeline_en_5.5.0_3.0_1725521218184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_nepalibert_pipeline_en_5.5.0_3.0_1725521218184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_nepalibert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_nepalibert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_nepalibert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.0 MB| + +## References + +https://huggingface.co/Rajan/NepaliBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_en.md new file mode 100644 index 00000000000000..e74f5807fb6943 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_norwegian_bokml_bert_ncc_male2female BertSentenceEmbeddings from NbAiLab +author: John Snow Labs +name: sent_norwegian_bokml_bert_ncc_male2female +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_norwegian_bokml_bert_ncc_male2female` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_bert_ncc_male2female_en_5.5.0_3.0_1725535173006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_bert_ncc_male2female_en_5.5.0_3.0_1725535173006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_norwegian_bokml_bert_ncc_male2female","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_norwegian_bokml_bert_ncc_male2female","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_norwegian_bokml_bert_ncc_male2female| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|666.0 MB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-ncc-male2female \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_pipeline_en.md new file mode 100644 index 00000000000000..3747e487801ea2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_norwegian_bokml_bert_ncc_male2female_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_norwegian_bokml_bert_ncc_male2female_pipeline pipeline BertSentenceEmbeddings from NbAiLab +author: John Snow Labs +name: sent_norwegian_bokml_bert_ncc_male2female_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_norwegian_bokml_bert_ncc_male2female_pipeline` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_bert_ncc_male2female_pipeline_en_5.5.0_3.0_1725535205495.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_bert_ncc_male2female_pipeline_en_5.5.0_3.0_1725535205495.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_norwegian_bokml_bert_ncc_male2female_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_norwegian_bokml_bert_ncc_male2female_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_norwegian_bokml_bert_ncc_male2female_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|666.6 MB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-ncc-male2female + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_en.md new file mode 100644 index 00000000000000..2fb5333f4e5bf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_recipebert BertSentenceEmbeddings from alexdseo +author: John Snow Labs +name: sent_recipebert +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_recipebert` is a English model originally trained by alexdseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_recipebert_en_5.5.0_3.0_1725534609936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_recipebert_en_5.5.0_3.0_1725534609936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_recipebert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_recipebert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_recipebert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/alexdseo/RecipeBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_pipeline_en.md new file mode 100644 index 00000000000000..b3316aea373929 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_recipebert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_recipebert_pipeline pipeline BertSentenceEmbeddings from alexdseo +author: John Snow Labs +name: sent_recipebert_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_recipebert_pipeline` is a English model originally trained by alexdseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_recipebert_pipeline_en_5.5.0_3.0_1725534630153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_recipebert_pipeline_en_5.5.0_3.0_1725534630153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_recipebert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_recipebert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_recipebert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/alexdseo/RecipeBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_simlm_base_msmarco_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_simlm_base_msmarco_pipeline_en.md new file mode 100644 index 00000000000000..91d3720b102e03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_simlm_base_msmarco_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_simlm_base_msmarco_pipeline pipeline BertSentenceEmbeddings from intfloat +author: John Snow Labs +name: sent_simlm_base_msmarco_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_simlm_base_msmarco_pipeline` is a English model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_simlm_base_msmarco_pipeline_en_5.5.0_3.0_1725521395656.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_simlm_base_msmarco_pipeline_en_5.5.0_3.0_1725521395656.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_simlm_base_msmarco_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_simlm_base_msmarco_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_simlm_base_msmarco_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/intfloat/simlm-base-msmarco + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_en.md new file mode 100644 index 00000000000000..264d0d9c934139 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_storieslm_v1_1963 BertSentenceEmbeddings from StoriesLM +author: John Snow Labs +name: sent_storieslm_v1_1963 +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_storieslm_v1_1963` is a English model originally trained by StoriesLM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_storieslm_v1_1963_en_5.5.0_3.0_1725535160859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_storieslm_v1_1963_en_5.5.0_3.0_1725535160859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_storieslm_v1_1963","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_storieslm_v1_1963","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_storieslm_v1_1963| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/StoriesLM/StoriesLM-v1-1963 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_pipeline_en.md new file mode 100644 index 00000000000000..6c6db2361a2060 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_storieslm_v1_1963_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_storieslm_v1_1963_pipeline pipeline BertSentenceEmbeddings from StoriesLM +author: John Snow Labs +name: sent_storieslm_v1_1963_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_storieslm_v1_1963_pipeline` is a English model originally trained by StoriesLM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_storieslm_v1_1963_pipeline_en_5.5.0_3.0_1725535180743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_storieslm_v1_1963_pipeline_en_5.5.0_3.0_1725535180743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_storieslm_v1_1963_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_storieslm_v1_1963_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_storieslm_v1_1963_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/StoriesLM/StoriesLM-v1-1963 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_ft_cstwitter_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_ft_cstwitter_en.md new file mode 100644 index 00000000000000..c4d7b7e72ac082 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_ft_cstwitter_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_ft_cstwitter XlmRoBertaSentenceEmbeddings from hadifar +author: John Snow Labs +name: sent_xlm_roberta_base_ft_cstwitter +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_ft_cstwitter` is a English model originally trained by hadifar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_ft_cstwitter_en_5.5.0_3.0_1725504943696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_ft_cstwitter_en_5.5.0_3.0_1725504943696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_ft_cstwitter","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_ft_cstwitter","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_ft_cstwitter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/hadifar/xlm-roberta-base-ft-CSTwitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_pretrained_en.md b/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_pretrained_en.md new file mode 100644 index 00000000000000..929a951b56ad26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sent_xlm_roberta_base_pretrained_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_pretrained XlmRoBertaSentenceEmbeddings from am-shb +author: John Snow Labs +name: sent_xlm_roberta_base_pretrained +date: 2024-09-05 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_pretrained` is a English model originally trained by am-shb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_pretrained_en_5.5.0_3.0_1725504848520.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_pretrained_en_5.5.0_3.0_1725504848520.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_pretrained","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_pretrained","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_pretrained| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/am-shb/xlm-roberta-base-pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_en.md b/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_en.md new file mode 100644 index 00000000000000..a95f85cc1c7aa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentence_classification4designtutor RoBertaForSequenceClassification from Shenzy +author: John Snow Labs +name: sentence_classification4designtutor +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_classification4designtutor` is a English model originally trained by Shenzy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_classification4designtutor_en_5.5.0_3.0_1725542681645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_classification4designtutor_en_5.5.0_3.0_1725542681645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("sentence_classification4designtutor","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("sentence_classification4designtutor", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_classification4designtutor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Shenzy/Sentence_Classification4DesignTutor \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_pipeline_en.md new file mode 100644 index 00000000000000..52f9f03d98dcef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sentence_classification4designtutor_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentence_classification4designtutor_pipeline pipeline RoBertaForSequenceClassification from Shenzy +author: John Snow Labs +name: sentence_classification4designtutor_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_classification4designtutor_pipeline` is a English model originally trained by Shenzy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_classification4designtutor_pipeline_en_5.5.0_3.0_1725542786262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_classification4designtutor_pipeline_en_5.5.0_3.0_1725542786262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentence_classification4designtutor_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentence_classification4designtutor_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_classification4designtutor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Shenzy/Sentence_Classification4DesignTutor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_en.md b/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_en.md new file mode 100644 index 00000000000000..01a2e79443bdcc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentiment_analysis_albert1 AlbertForSequenceClassification from adithya5243 +author: John Snow Labs +name: sentiment_analysis_albert1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analysis_albert1` is a English model originally trained by adithya5243. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analysis_albert1_en_5.5.0_3.0_1725543048803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analysis_albert1_en_5.5.0_3.0_1725543048803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("sentiment_analysis_albert1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("sentiment_analysis_albert1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analysis_albert1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/adithya5243/sentiment-analysis_albert1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_pipeline_en.md new file mode 100644 index 00000000000000..c357c5e0d6edb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sentiment_analysis_albert1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_analysis_albert1_pipeline pipeline AlbertForSequenceClassification from adithya5243 +author: John Snow Labs +name: sentiment_analysis_albert1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analysis_albert1_pipeline` is a English model originally trained by adithya5243. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analysis_albert1_pipeline_en_5.5.0_3.0_1725543051264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analysis_albert1_pipeline_en_5.5.0_3.0_1725543051264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_analysis_albert1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_analysis_albert1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analysis_albert1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/adithya5243/sentiment-analysis_albert1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sentiment_sentiment_small_random2_seed0_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sentiment_sentiment_small_random2_seed0_bernice_pipeline_en.md new file mode 100644 index 00000000000000..d3cd5cfb7fd3a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sentiment_sentiment_small_random2_seed0_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_sentiment_small_random2_seed0_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: sentiment_sentiment_small_random2_seed0_bernice_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_sentiment_small_random2_seed0_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_random2_seed0_bernice_pipeline_en_5.5.0_3.0_1725538095412.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_random2_seed0_bernice_pipeline_en_5.5.0_3.0_1725538095412.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_sentiment_small_random2_seed0_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_sentiment_small_random2_seed0_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_sentiment_small_random2_seed0_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|790.2 MB| + +## References + +https://huggingface.co/tweettemposhift/sentiment-sentiment_small_random2_seed0-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sept_1_2024_awesome_eli5_mlm_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-sept_1_2024_awesome_eli5_mlm_model_en.md new file mode 100644 index 00000000000000..4b86169797e6c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sept_1_2024_awesome_eli5_mlm_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sept_1_2024_awesome_eli5_mlm_model RoBertaEmbeddings from jzkv5 +author: John Snow Labs +name: sept_1_2024_awesome_eli5_mlm_model +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sept_1_2024_awesome_eli5_mlm_model` is a English model originally trained by jzkv5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sept_1_2024_awesome_eli5_mlm_model_en_5.5.0_3.0_1725572677437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sept_1_2024_awesome_eli5_mlm_model_en_5.5.0_3.0_1725572677437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("sept_1_2024_awesome_eli5_mlm_model","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("sept_1_2024_awesome_eli5_mlm_model","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sept_1_2024_awesome_eli5_mlm_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.4 MB| + +## References + +https://huggingface.co/jzkv5/Sept_1_2024_awesome_eli5_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en.md new file mode 100644 index 00000000000000..2c58c629560d01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline pipeline MarianTransformer from sheykina +author: John Snow Labs +name: seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline` is a English model originally trained by sheykina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en_5.5.0_3.0_1725494616727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline_en_5.5.0_3.0_1725494616727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|seq2seq_finetuned_cxg_dutch_tonga_tonga_islands_code_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.0 MB| + +## References + +https://huggingface.co/sheykina/seq2seq-finetuned-CXG-nl-to-code + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-setfit_finetuned_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-setfit_finetuned_classifier_en.md new file mode 100644 index 00000000000000..6a0ea31bb015b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-setfit_finetuned_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English setfit_finetuned_classifier MPNetForSequenceClassification from Kuaaangwen +author: John Snow Labs +name: setfit_finetuned_classifier +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_finetuned_classifier` is a English model originally trained by Kuaaangwen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_finetuned_classifier_en_5.5.0_3.0_1725575131887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_finetuned_classifier_en_5.5.0_3.0_1725575131887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("setfit_finetuned_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("setfit_finetuned_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_finetuned_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Kuaaangwen/Setfit-finetuned-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-shopee_ner_en.md b/docs/_posts/ahmedlone127/2024-09-05-shopee_ner_en.md new file mode 100644 index 00000000000000..b745095b9cd942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-shopee_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English shopee_ner DistilBertForTokenClassification from yzzhu +author: John Snow Labs +name: shopee_ner +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`shopee_ner` is a English model originally trained by yzzhu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/shopee_ner_en_5.5.0_3.0_1725518398191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/shopee_ner_en_5.5.0_3.0_1725518398191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("shopee_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("shopee_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|shopee_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/yzzhu/shopee-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_en.md new file mode 100644 index 00000000000000..914996445304fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English short_answer_classification MPNetForSequenceClassification from tiedaar +author: John Snow Labs +name: short_answer_classification +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`short_answer_classification` is a English model originally trained by tiedaar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/short_answer_classification_en_5.5.0_3.0_1725575152709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/short_answer_classification_en_5.5.0_3.0_1725575152709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("short_answer_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("short_answer_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|short_answer_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|395.8 MB| + +## References + +https://huggingface.co/tiedaar/short-answer-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_pipeline_en.md new file mode 100644 index 00000000000000..7eddc617b3f6f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-short_answer_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English short_answer_classification_pipeline pipeline MPNetForSequenceClassification from tiedaar +author: John Snow Labs +name: short_answer_classification_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`short_answer_classification_pipeline` is a English model originally trained by tiedaar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/short_answer_classification_pipeline_en_5.5.0_3.0_1725575178708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/short_answer_classification_pipeline_en_5.5.0_3.0_1725575178708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("short_answer_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("short_answer_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|short_answer_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|395.9 MB| + +## References + +https://huggingface.co/tiedaar/short-answer-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-singberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-singberta_pipeline_en.md new file mode 100644 index 00000000000000..90bba2d24cb669 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-singberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English singberta_pipeline pipeline RoBertaEmbeddings from mstaron +author: John Snow Labs +name: singberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`singberta_pipeline` is a English model originally trained by mstaron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/singberta_pipeline_en_5.5.0_3.0_1725573203602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/singberta_pipeline_en_5.5.0_3.0_1725573203602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("singberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("singberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|singberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|310.8 MB| + +## References + +https://huggingface.co/mstaron/SingBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_pipeline_si.md b/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_pipeline_si.md new file mode 100644 index 00000000000000..1537428e37dd5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_pipeline_si.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Sinhala, Sinhalese sinhala_roberta_mc4_pipeline pipeline RoBertaEmbeddings from keshan +author: John Snow Labs +name: sinhala_roberta_mc4_pipeline +date: 2024-09-05 +tags: [si, open_source, pipeline, onnx] +task: Embeddings +language: si +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhala_roberta_mc4_pipeline` is a Sinhala, Sinhalese model originally trained by keshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhala_roberta_mc4_pipeline_si_5.5.0_3.0_1725573018747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhala_roberta_mc4_pipeline_si_5.5.0_3.0_1725573018747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sinhala_roberta_mc4_pipeline", lang = "si") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sinhala_roberta_mc4_pipeline", lang = "si") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhala_roberta_mc4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|si| +|Size:|465.9 MB| + +## References + +https://huggingface.co/keshan/sinhala-roberta-mc4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_si.md b/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_si.md new file mode 100644 index 00000000000000..59ed110e1732c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sinhala_roberta_mc4_si.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Sinhala, Sinhalese sinhala_roberta_mc4 RoBertaEmbeddings from keshan +author: John Snow Labs +name: sinhala_roberta_mc4 +date: 2024-09-05 +tags: [si, open_source, onnx, embeddings, roberta] +task: Embeddings +language: si +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhala_roberta_mc4` is a Sinhala, Sinhalese model originally trained by keshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhala_roberta_mc4_si_5.5.0_3.0_1725572995401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhala_roberta_mc4_si_5.5.0_3.0_1725572995401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("sinhala_roberta_mc4","si") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("sinhala_roberta_mc4","si") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhala_roberta_mc4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|si| +|Size:|465.9 MB| + +## References + +https://huggingface.co/keshan/sinhala-roberta-mc4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_en.md b/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_en.md new file mode 100644 index 00000000000000..905321167cee0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sinhalese_bert_ner BertForTokenClassification from shubh1410 +author: John Snow Labs +name: sinhalese_bert_ner +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhalese_bert_ner` is a English model originally trained by shubh1410. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhalese_bert_ner_en_5.5.0_3.0_1725538572693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhalese_bert_ner_en_5.5.0_3.0_1725538572693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sinhalese_bert_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sinhalese_bert_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhalese_bert_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/shubh1410/si_bert_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_pipeline_en.md new file mode 100644 index 00000000000000..da8ceae6d9ad81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sinhalese_bert_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sinhalese_bert_ner_pipeline pipeline BertForTokenClassification from shubh1410 +author: John Snow Labs +name: sinhalese_bert_ner_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhalese_bert_ner_pipeline` is a English model originally trained by shubh1410. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhalese_bert_ner_pipeline_en_5.5.0_3.0_1725538598814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhalese_bert_ner_pipeline_en_5.5.0_3.0_1725538598814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sinhalese_bert_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sinhalese_bert_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhalese_bert_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/shubh1410/si_bert_ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_en.md b/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_en.md new file mode 100644 index 00000000000000..caf845f69373b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sitexsometre_camembert_large_stsb50 CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_large_stsb50 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_large_stsb50` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb50_en_5.5.0_3.0_1725544082727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb50_en_5.5.0_3.0_1725544082727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb50","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("sitexsometre_camembert_large_stsb50", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_large_stsb50| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|805.5 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-large-stsb50 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_pipeline_en.md new file mode 100644 index 00000000000000..562ab85c1ea8e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sitexsometre_camembert_large_stsb50_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sitexsometre_camembert_large_stsb50_pipeline pipeline CamemBertForSequenceClassification from Kigo1974 +author: John Snow Labs +name: sitexsometre_camembert_large_stsb50_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sitexsometre_camembert_large_stsb50_pipeline` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb50_pipeline_en_5.5.0_3.0_1725544316123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sitexsometre_camembert_large_stsb50_pipeline_en_5.5.0_3.0_1725544316123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sitexsometre_camembert_large_stsb50_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sitexsometre_camembert_large_stsb50_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sitexsometre_camembert_large_stsb50_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|805.5 MB| + +## References + +https://huggingface.co/Kigo1974/sitexsometre-camembert-large-stsb50 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_en.md b/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_en.md new file mode 100644 index 00000000000000..1b54fc2b94c1da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English spanish_eng_xlm_roberta_sentiment XlmRoBertaForSequenceClassification from MinaAlmasi +author: John Snow Labs +name: spanish_eng_xlm_roberta_sentiment +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_eng_xlm_roberta_sentiment` is a English model originally trained by MinaAlmasi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_eng_xlm_roberta_sentiment_en_5.5.0_3.0_1725514482772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_eng_xlm_roberta_sentiment_en_5.5.0_3.0_1725514482772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("spanish_eng_xlm_roberta_sentiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("spanish_eng_xlm_roberta_sentiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_eng_xlm_roberta_sentiment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|858.7 MB| + +## References + +https://huggingface.co/MinaAlmasi/ES-ENG-xlm-roberta-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_pipeline_en.md new file mode 100644 index 00000000000000..43c2a360e2d521 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-spanish_eng_xlm_roberta_sentiment_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spanish_eng_xlm_roberta_sentiment_pipeline pipeline XlmRoBertaForSequenceClassification from MinaAlmasi +author: John Snow Labs +name: spanish_eng_xlm_roberta_sentiment_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_eng_xlm_roberta_sentiment_pipeline` is a English model originally trained by MinaAlmasi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_eng_xlm_roberta_sentiment_pipeline_en_5.5.0_3.0_1725514550394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_eng_xlm_roberta_sentiment_pipeline_en_5.5.0_3.0_1725514550394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_eng_xlm_roberta_sentiment_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_eng_xlm_roberta_sentiment_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_eng_xlm_roberta_sentiment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.8 MB| + +## References + +https://huggingface.co/MinaAlmasi/ES-ENG-xlm-roberta-sentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_en.md new file mode 100644 index 00000000000000..a4d3cdbdad8139 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English splade_pp_english_v1 BertEmbeddings from prithivida +author: John Snow Labs +name: splade_pp_english_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_pp_english_v1` is a English model originally trained by prithivida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_pp_english_v1_en_5.5.0_3.0_1725553213456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_pp_english_v1_en_5.5.0_3.0_1725553213456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("splade_pp_english_v1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("splade_pp_english_v1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_pp_english_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/prithivida/Splade_PP_en_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_pipeline_en.md new file mode 100644 index 00000000000000..19cd489efdb56d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-splade_pp_english_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English splade_pp_english_v1_pipeline pipeline BertEmbeddings from prithivida +author: John Snow Labs +name: splade_pp_english_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_pp_english_v1_pipeline` is a English model originally trained by prithivida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_pp_english_v1_pipeline_en_5.5.0_3.0_1725553235021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_pp_english_v1_pipeline_en_5.5.0_3.0_1725553235021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("splade_pp_english_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("splade_pp_english_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_pp_english_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/prithivida/Splade_PP_en_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_pipeline_sr.md b/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_pipeline_sr.md new file mode 100644 index 00000000000000..fedfc7d6129db5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_pipeline_sr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Serbian srberta_nemanjapetrovic_pipeline pipeline RoBertaEmbeddings from nemanjaPetrovic +author: John Snow Labs +name: srberta_nemanjapetrovic_pipeline +date: 2024-09-05 +tags: [sr, open_source, pipeline, onnx] +task: Embeddings +language: sr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`srberta_nemanjapetrovic_pipeline` is a Serbian model originally trained by nemanjaPetrovic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/srberta_nemanjapetrovic_pipeline_sr_5.5.0_3.0_1725572680810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/srberta_nemanjapetrovic_pipeline_sr_5.5.0_3.0_1725572680810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("srberta_nemanjapetrovic_pipeline", lang = "sr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("srberta_nemanjapetrovic_pipeline", lang = "sr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|srberta_nemanjapetrovic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sr| +|Size:|466.3 MB| + +## References + +https://huggingface.co/nemanjaPetrovic/SrBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_sr.md b/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_sr.md new file mode 100644 index 00000000000000..d22304d464fbd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-srberta_nemanjapetrovic_sr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Serbian srberta_nemanjapetrovic RoBertaEmbeddings from nemanjaPetrovic +author: John Snow Labs +name: srberta_nemanjapetrovic +date: 2024-09-05 +tags: [sr, open_source, onnx, embeddings, roberta] +task: Embeddings +language: sr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`srberta_nemanjapetrovic` is a Serbian model originally trained by nemanjaPetrovic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/srberta_nemanjapetrovic_sr_5.5.0_3.0_1725572656282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/srberta_nemanjapetrovic_sr_5.5.0_3.0_1725572656282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("srberta_nemanjapetrovic","sr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("srberta_nemanjapetrovic","sr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|srberta_nemanjapetrovic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|sr| +|Size:|466.2 MB| + +## References + +https://huggingface.co/nemanjaPetrovic/SrBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-stance_detection_en.md b/docs/_posts/ahmedlone127/2024-09-05-stance_detection_en.md new file mode 100644 index 00000000000000..b0129b463b3837 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-stance_detection_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English stance_detection BertForSequenceClassification from cheese7858 +author: John Snow Labs +name: stance_detection +date: 2024-09-05 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stance_detection` is a English model originally trained by cheese7858. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stance_detection_en_5.5.0_3.0_1725526862111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stance_detection_en_5.5.0_3.0_1725526862111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = BertForSequenceClassification.pretrained("stance_detection","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("stance_detection","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stance_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|792.3 MB| + +## References + +References + +https://huggingface.co/cheese7858/stance_detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-stance_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-stance_detection_pipeline_en.md new file mode 100644 index 00000000000000..607bd7be7be1da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-stance_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stance_detection_pipeline pipeline XlmRoBertaForSequenceClassification from dominiks +author: John Snow Labs +name: stance_detection_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stance_detection_pipeline` is a English model originally trained by dominiks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stance_detection_pipeline_en_5.5.0_3.0_1725526998943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stance_detection_pipeline_en_5.5.0_3.0_1725526998943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stance_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stance_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stance_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|792.4 MB| + +## References + +https://huggingface.co/dominiks/stance-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en.md new file mode 100644 index 00000000000000..158b66e64a3e46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline pipeline DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en_5.5.0_3.0_1725580126764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline_en_5.5.0_3.0_1725580126764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stego_classifier_checkpoint_epoch_70_2024_07_26_16_03_28_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/stego-classifier-checkpoint-epoch-70-2024-07-26_16-03-28 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en.md new file mode 100644 index 00000000000000..eafb8f76debdc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline pipeline RoBertaForSequenceClassification from rajevan123 +author: John Snow Labs +name: sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline` is a English model originally trained by rajevan123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en_5.5.0_3.0_1725542151538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline_en_5.5.0_3.0_1725542151538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sts_conventional_fine_tuning_capstone_roberta_base_filtered_137_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|433.4 MB| + +## References + +https://huggingface.co/rajevan123/STS-conventional-Fine-Tuning-Capstone-roberta-base-filtered-137 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-stsb_mpnet_basev2_sitexse_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-stsb_mpnet_basev2_sitexse_pipeline_en.md new file mode 100644 index 00000000000000..1b6e596cbbace1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-stsb_mpnet_basev2_sitexse_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stsb_mpnet_basev2_sitexse_pipeline pipeline MPNetForSequenceClassification from Kigo1974 +author: John Snow Labs +name: stsb_mpnet_basev2_sitexse_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stsb_mpnet_basev2_sitexse_pipeline` is a English model originally trained by Kigo1974. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stsb_mpnet_basev2_sitexse_pipeline_en_5.5.0_3.0_1725575009193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stsb_mpnet_basev2_sitexse_pipeline_en_5.5.0_3.0_1725575009193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stsb_mpnet_basev2_sitexse_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stsb_mpnet_basev2_sitexse_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stsb_mpnet_basev2_sitexse_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Kigo1974/stsb-mpnet-basev2-sitexse + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sunbird_english_ganda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-sunbird_english_ganda_pipeline_en.md new file mode 100644 index 00000000000000..885409d0f682bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sunbird_english_ganda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sunbird_english_ganda_pipeline pipeline MarianTransformer from Sunbird +author: John Snow Labs +name: sunbird_english_ganda_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sunbird_english_ganda_pipeline` is a English model originally trained by Sunbird. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sunbird_english_ganda_pipeline_en_5.5.0_3.0_1725545460804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sunbird_english_ganda_pipeline_en_5.5.0_3.0_1725545460804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sunbird_english_ganda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sunbird_english_ganda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sunbird_english_ganda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|515.1 MB| + +## References + +https://huggingface.co/Sunbird/sunbird-en-lg + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_pipeline_su.md b/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_pipeline_su.md new file mode 100644 index 00000000000000..4f71fcc3541d11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_pipeline_su.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Sundanese sundanese_roberta_base_pipeline pipeline RoBertaEmbeddings from w11wo +author: John Snow Labs +name: sundanese_roberta_base_pipeline +date: 2024-09-05 +tags: [su, open_source, pipeline, onnx] +task: Embeddings +language: su +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sundanese_roberta_base_pipeline` is a Sundanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sundanese_roberta_base_pipeline_su_5.5.0_3.0_1725573108833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sundanese_roberta_base_pipeline_su_5.5.0_3.0_1725573108833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sundanese_roberta_base_pipeline", lang = "su") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sundanese_roberta_base_pipeline", lang = "su") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sundanese_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|su| +|Size:|465.6 MB| + +## References + +https://huggingface.co/w11wo/sundanese-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_su.md b/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_su.md new file mode 100644 index 00000000000000..49480a66cdb6a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-sundanese_roberta_base_su.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Sundanese sundanese_roberta_base RoBertaEmbeddings from w11wo +author: John Snow Labs +name: sundanese_roberta_base +date: 2024-09-05 +tags: [su, open_source, onnx, embeddings, roberta] +task: Embeddings +language: su +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sundanese_roberta_base` is a Sundanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sundanese_roberta_base_su_5.5.0_3.0_1725573085189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sundanese_roberta_base_su_5.5.0_3.0_1725573085189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("sundanese_roberta_base","su") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("sundanese_roberta_base","su") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sundanese_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|su| +|Size:|465.6 MB| + +## References + +https://huggingface.co/w11wo/sundanese-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_base_en.md b/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_base_en.md new file mode 100644 index 00000000000000..eb641a4c2cc1e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English t2t_gun_nlth_from_base MarianTransformer from tiagoblima +author: John Snow Labs +name: t2t_gun_nlth_from_base +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`t2t_gun_nlth_from_base` is a English model originally trained by tiagoblima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_base_en_5.5.0_3.0_1725494434326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_base_en_5.5.0_3.0_1725494434326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("t2t_gun_nlth_from_base","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("t2t_gun_nlth_from_base","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|t2t_gun_nlth_from_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|220.4 MB| + +## References + +https://huggingface.co/tiagoblima/t2t-gun-nlth-from-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_stratch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_stratch_pipeline_en.md new file mode 100644 index 00000000000000..ced4d5807ad730 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-t2t_gun_nlth_from_stratch_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English t2t_gun_nlth_from_stratch_pipeline pipeline MarianTransformer from tiagoblima +author: John Snow Labs +name: t2t_gun_nlth_from_stratch_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`t2t_gun_nlth_from_stratch_pipeline` is a English model originally trained by tiagoblima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_stratch_pipeline_en_5.5.0_3.0_1725494979774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_stratch_pipeline_en_5.5.0_3.0_1725494979774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("t2t_gun_nlth_from_stratch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("t2t_gun_nlth_from_stratch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|t2t_gun_nlth_from_stratch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|220.9 MB| + +## References + +https://huggingface.co/tiagoblima/t2t-gun-nlth-from-stratch + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_en.md b/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_en.md new file mode 100644 index 00000000000000..00bc9cbbf4b2c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tajik_messages_classificator XlmRoBertaForSequenceClassification from SeeeRGo +author: John Snow Labs +name: tajik_messages_classificator +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tajik_messages_classificator` is a English model originally trained by SeeeRGo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tajik_messages_classificator_en_5.5.0_3.0_1725530838432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tajik_messages_classificator_en_5.5.0_3.0_1725530838432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("tajik_messages_classificator","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("tajik_messages_classificator", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tajik_messages_classificator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|790.5 MB| + +## References + +https://huggingface.co/SeeeRGo/tg-messages-classificator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_pipeline_en.md new file mode 100644 index 00000000000000..f891ad754c306e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tajik_messages_classificator_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tajik_messages_classificator_pipeline pipeline XlmRoBertaForSequenceClassification from SeeeRGo +author: John Snow Labs +name: tajik_messages_classificator_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tajik_messages_classificator_pipeline` is a English model originally trained by SeeeRGo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tajik_messages_classificator_pipeline_en_5.5.0_3.0_1725530972015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tajik_messages_classificator_pipeline_en_5.5.0_3.0_1725530972015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tajik_messages_classificator_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tajik_messages_classificator_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tajik_messages_classificator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|790.6 MB| + +## References + +https://huggingface.co/SeeeRGo/tg-messages-classificator + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_en.md b/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_en.md new file mode 100644 index 00000000000000..3498ce8daa949a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tapt_helpfulness_base_pretraining_model_full_train RoBertaEmbeddings from ltuzova +author: John Snow Labs +name: tapt_helpfulness_base_pretraining_model_full_train +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tapt_helpfulness_base_pretraining_model_full_train` is a English model originally trained by ltuzova. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tapt_helpfulness_base_pretraining_model_full_train_en_5.5.0_3.0_1725578005527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tapt_helpfulness_base_pretraining_model_full_train_en_5.5.0_3.0_1725578005527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("tapt_helpfulness_base_pretraining_model_full_train","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("tapt_helpfulness_base_pretraining_model_full_train","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tapt_helpfulness_base_pretraining_model_full_train| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/ltuzova/tapt_helpfulness_base_pretraining_model_full_train \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_pipeline_en.md new file mode 100644 index 00000000000000..b65a916bd161a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tapt_helpfulness_base_pretraining_model_full_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tapt_helpfulness_base_pretraining_model_full_train_pipeline pipeline RoBertaEmbeddings from ltuzova +author: John Snow Labs +name: tapt_helpfulness_base_pretraining_model_full_train_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tapt_helpfulness_base_pretraining_model_full_train_pipeline` is a English model originally trained by ltuzova. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tapt_helpfulness_base_pretraining_model_full_train_pipeline_en_5.5.0_3.0_1725578029783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tapt_helpfulness_base_pretraining_model_full_train_pipeline_en_5.5.0_3.0_1725578029783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tapt_helpfulness_base_pretraining_model_full_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tapt_helpfulness_base_pretraining_model_full_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tapt_helpfulness_base_pretraining_model_full_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/ltuzova/tapt_helpfulness_base_pretraining_model_full_train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_en.md b/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_en.md new file mode 100644 index 00000000000000..2a054b505c04b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English task_implicit_task__model_deberta__aug_method_rsa DeBertaForSequenceClassification from BenjaminOcampo +author: John Snow Labs +name: task_implicit_task__model_deberta__aug_method_rsa +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_implicit_task__model_deberta__aug_method_rsa` is a English model originally trained by BenjaminOcampo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_rsa_en_5.5.0_3.0_1725562266160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_rsa_en_5.5.0_3.0_1725562266160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("task_implicit_task__model_deberta__aug_method_rsa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("task_implicit_task__model_deberta__aug_method_rsa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_implicit_task__model_deberta__aug_method_rsa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|607.6 MB| + +## References + +https://huggingface.co/BenjaminOcampo/task-implicit_task__model-deberta__aug_method-rsa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_pipeline_en.md new file mode 100644 index 00000000000000..be2f0c4e3e2ac0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-task_implicit_task__model_deberta__aug_method_rsa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English task_implicit_task__model_deberta__aug_method_rsa_pipeline pipeline DeBertaForSequenceClassification from BenjaminOcampo +author: John Snow Labs +name: task_implicit_task__model_deberta__aug_method_rsa_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_implicit_task__model_deberta__aug_method_rsa_pipeline` is a English model originally trained by BenjaminOcampo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_rsa_pipeline_en_5.5.0_3.0_1725562305507.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_rsa_pipeline_en_5.5.0_3.0_1725562305507.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("task_implicit_task__model_deberta__aug_method_rsa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("task_implicit_task__model_deberta__aug_method_rsa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_implicit_task__model_deberta__aug_method_rsa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|607.6 MB| + +## References + +https://huggingface.co/BenjaminOcampo/task-implicit_task__model-deberta__aug_method-rsa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_pipeline_vi.md new file mode 100644 index 00000000000000..4c5b495e10f71f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_pipeline_vi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Vietnamese tech_roberta_pipeline pipeline XlmRoBertaEmbeddings from imta-ai +author: John Snow Labs +name: tech_roberta_pipeline +date: 2024-09-05 +tags: [vi, open_source, pipeline, onnx] +task: Embeddings +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tech_roberta_pipeline` is a Vietnamese model originally trained by imta-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tech_roberta_pipeline_vi_5.5.0_3.0_1725555933413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tech_roberta_pipeline_vi_5.5.0_3.0_1725555933413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tech_roberta_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tech_roberta_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tech_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|942.9 MB| + +## References + +https://huggingface.co/imta-ai/tech-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_vi.md b/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_vi.md new file mode 100644 index 00000000000000..a8422daae0b7b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tech_roberta_vi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Vietnamese tech_roberta XlmRoBertaEmbeddings from imta-ai +author: John Snow Labs +name: tech_roberta +date: 2024-09-05 +tags: [vi, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tech_roberta` is a Vietnamese model originally trained by imta-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tech_roberta_vi_5.5.0_3.0_1725555841533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tech_roberta_vi_5.5.0_3.0_1725555841533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("tech_roberta","vi") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("tech_roberta","vi") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tech_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|vi| +|Size:|942.9 MB| + +## References + +https://huggingface.co/imta-ai/tech-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_airbus_year_report_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_airbus_year_report_pipeline_en.md new file mode 100644 index 00000000000000..86b07463c4f404 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_airbus_year_report_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_airbus_year_report_pipeline pipeline DistilBertEmbeddings from Andi2022HH +author: John Snow Labs +name: test_airbus_year_report_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_airbus_year_report_pipeline` is a English model originally trained by Andi2022HH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_airbus_year_report_pipeline_en_5.5.0_3.0_1725524622636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_airbus_year_report_pipeline_en_5.5.0_3.0_1725524622636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_airbus_year_report_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_airbus_year_report_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_airbus_year_report_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/Andi2022HH/test_airbus_year_report + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_demo_qa_with_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_demo_qa_with_roberta_en.md new file mode 100644 index 00000000000000..01e874bb140fe2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_demo_qa_with_roberta_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_demo_qa_with_roberta RoBertaForQuestionAnswering from nes470 +author: John Snow Labs +name: test_demo_qa_with_roberta +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_demo_qa_with_roberta` is a English model originally trained by nes470. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_demo_qa_with_roberta_en_5.5.0_3.0_1725576240709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_demo_qa_with_roberta_en_5.5.0_3.0_1725576240709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("test_demo_qa_with_roberta","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("test_demo_qa_with_roberta", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_demo_qa_with_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/nes470/test-demo-qa-with-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_directory_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_directory_en.md new file mode 100644 index 00000000000000..9f7cdc88ed7240 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_directory_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English test_directory DistilBertForTokenClassification from Fabian-Hanitzsch +author: John Snow Labs +name: test_directory +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_directory` is a English model originally trained by Fabian-Hanitzsch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_directory_en_5.5.0_3.0_1725495988587.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_directory_en_5.5.0_3.0_1725495988587.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("test_directory","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("test_directory", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_directory| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Fabian-Hanitzsch/test_directory \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_directory_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_directory_pipeline_en.md new file mode 100644 index 00000000000000..0f04116bb1cf30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_directory_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_directory_pipeline pipeline DistilBertForTokenClassification from Fabian-Hanitzsch +author: John Snow Labs +name: test_directory_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_directory_pipeline` is a English model originally trained by Fabian-Hanitzsch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_directory_pipeline_en_5.5.0_3.0_1725496000889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_directory_pipeline_en_5.5.0_3.0_1725496000889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_directory_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_directory_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_directory_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Fabian-Hanitzsch/test_directory + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_en.md new file mode 100644 index 00000000000000..b874c33a5681cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English test CLIPForZeroShotClassification from risedev +author: John Snow Labs +name: test +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test` is a English model originally trained by risedev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_en_5.5.0_3.0_1725540211039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_en_5.5.0_3.0_1725540211039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("test","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("test","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|397.7 MB| + +## References + +https://huggingface.co/risedev/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-test_pipeline_en.md new file mode 100644 index 00000000000000..db7933f1b903db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-test_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English test_pipeline pipeline CLIPForZeroShotClassification from risedev +author: John Snow Labs +name: test_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_pipeline` is a English model originally trained by risedev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_pipeline_en_5.5.0_3.0_1725540303199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_pipeline_en_5.5.0_3.0_1725540303199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|397.7 MB| + +## References + +https://huggingface.co/risedev/test + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-testing_model_en.md b/docs/_posts/ahmedlone127/2024-09-05-testing_model_en.md new file mode 100644 index 00000000000000..39a924c117bd66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-testing_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English testing_model RoBertaEmbeddings from stolenpyjak +author: John Snow Labs +name: testing_model +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testing_model` is a English model originally trained by stolenpyjak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testing_model_en_5.5.0_3.0_1725572589990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testing_model_en_5.5.0_3.0_1725572589990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("testing_model","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("testing_model","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testing_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/stolenpyjak/testing_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-testing_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-testing_model_pipeline_en.md new file mode 100644 index 00000000000000..1389f699ec7c5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-testing_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English testing_model_pipeline pipeline RoBertaEmbeddings from stolenpyjak +author: John Snow Labs +name: testing_model_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testing_model_pipeline` is a English model originally trained by stolenpyjak. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testing_model_pipeline_en_5.5.0_3.0_1725572605342.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testing_model_pipeline_en_5.5.0_3.0_1725572605342.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testing_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testing_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testing_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/stolenpyjak/testing_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-timeset_ifm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-timeset_ifm_pipeline_en.md new file mode 100644 index 00000000000000..120014a9bed7ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-timeset_ifm_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English timeset_ifm_pipeline pipeline CLIPForZeroShotClassification from Timeset +author: John Snow Labs +name: timeset_ifm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`timeset_ifm_pipeline` is a English model originally trained by Timeset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/timeset_ifm_pipeline_en_5.5.0_3.0_1725523164827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/timeset_ifm_pipeline_en_5.5.0_3.0_1725523164827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("timeset_ifm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("timeset_ifm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|timeset_ifm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|397.5 MB| + +## References + +https://huggingface.co/Timeset/timeset-ifm + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_en.md new file mode 100644 index 00000000000000..2c7719064d3572 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tiny_distill_2601_2 AlbertForSequenceClassification from gg-ai +author: John Snow Labs +name: tiny_distill_2601_2 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_distill_2601_2` is a English model originally trained by gg-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_distill_2601_2_en_5.5.0_3.0_1725525461757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_distill_2601_2_en_5.5.0_3.0_1725525461757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_distill_2601_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_distill_2601_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_distill_2601_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|20.4 MB| + +## References + +https://huggingface.co/gg-ai/tiny-distill-2601-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_pipeline_en.md new file mode 100644 index 00000000000000..0d5aff86400493 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tiny_distill_2601_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tiny_distill_2601_2_pipeline pipeline AlbertForSequenceClassification from gg-ai +author: John Snow Labs +name: tiny_distill_2601_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_distill_2601_2_pipeline` is a English model originally trained by gg-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_distill_2601_2_pipeline_en_5.5.0_3.0_1725525463089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_distill_2601_2_pipeline_en_5.5.0_3.0_1725525463089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tiny_distill_2601_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tiny_distill_2601_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_distill_2601_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|20.5 MB| + +## References + +https://huggingface.co/gg-ai/tiny-distill-2601-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_en.md b/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_en.md new file mode 100644 index 00000000000000..5639becf7d31a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tokenclassificationmodel DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: tokenclassificationmodel +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tokenclassificationmodel` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tokenclassificationmodel_en_5.5.0_3.0_1725518267929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tokenclassificationmodel_en_5.5.0_3.0_1725518267929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("tokenclassificationmodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("tokenclassificationmodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tokenclassificationmodel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/tokenclassificationmodel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_pipeline_en.md new file mode 100644 index 00000000000000..5e2a30558dd955 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tokenclassificationmodel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tokenclassificationmodel_pipeline pipeline DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: tokenclassificationmodel_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tokenclassificationmodel_pipeline` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tokenclassificationmodel_pipeline_en_5.5.0_3.0_1725518280718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tokenclassificationmodel_pipeline_en_5.5.0_3.0_1725518280718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tokenclassificationmodel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tokenclassificationmodel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tokenclassificationmodel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/tokenclassificationmodel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-topic_weather_en.md b/docs/_posts/ahmedlone127/2024-09-05-topic_weather_en.md new file mode 100644 index 00000000000000..fbcb94a88e2caf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-topic_weather_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English topic_weather RoBertaForSequenceClassification from dell-research-harvard +author: John Snow Labs +name: topic_weather +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_weather` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_weather_en_5.5.0_3.0_1725542264434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_weather_en_5.5.0_3.0_1725542264434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("topic_weather","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("topic_weather", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_weather| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.0 MB| + +## References + +https://huggingface.co/dell-research-harvard/topic-weather \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_en.md new file mode 100644 index 00000000000000..70f5930771e244 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English toxic_comment_classification_using_roberta RoBertaForSequenceClassification from prabhaskenche +author: John Snow Labs +name: toxic_comment_classification_using_roberta +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`toxic_comment_classification_using_roberta` is a English model originally trained by prabhaskenche. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/toxic_comment_classification_using_roberta_en_5.5.0_3.0_1725541986253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/toxic_comment_classification_using_roberta_en_5.5.0_3.0_1725541986253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("toxic_comment_classification_using_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("toxic_comment_classification_using_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|toxic_comment_classification_using_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/prabhaskenche/toxic-comment-classification-using-RoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_pipeline_en.md new file mode 100644 index 00000000000000..18194305ac7a02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-toxic_comment_classification_using_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English toxic_comment_classification_using_roberta_pipeline pipeline RoBertaForSequenceClassification from prabhaskenche +author: John Snow Labs +name: toxic_comment_classification_using_roberta_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`toxic_comment_classification_using_roberta_pipeline` is a English model originally trained by prabhaskenche. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/toxic_comment_classification_using_roberta_pipeline_en_5.5.0_3.0_1725542056919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/toxic_comment_classification_using_roberta_pipeline_en_5.5.0_3.0_1725542056919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("toxic_comment_classification_using_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("toxic_comment_classification_using_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|toxic_comment_classification_using_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/prabhaskenche/toxic-comment-classification-using-RoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-trained_baseline_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-trained_baseline_pipeline_en.md new file mode 100644 index 00000000000000..e621bbc7499d23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-trained_baseline_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trained_baseline_pipeline pipeline DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: trained_baseline_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_baseline_pipeline` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_baseline_pipeline_en_5.5.0_3.0_1725506028223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_baseline_pipeline_en_5.5.0_3.0_1725506028223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trained_baseline_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trained_baseline_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_baseline_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/annamariagnat/trained_baseline + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-trained_croatian_en.md b/docs/_posts/ahmedlone127/2024-09-05-trained_croatian_en.md new file mode 100644 index 00000000000000..e2271cdee43456 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-trained_croatian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trained_croatian DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: trained_croatian +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_croatian` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_croatian_en_5.5.0_3.0_1725495687589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_croatian_en_5.5.0_3.0_1725495687589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("trained_croatian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("trained_croatian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_croatian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/trained_croatian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-trained_slovak_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-trained_slovak_pipeline_en.md new file mode 100644 index 00000000000000..cdebb46a85d3ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-trained_slovak_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trained_slovak_pipeline pipeline DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: trained_slovak_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trained_slovak_pipeline` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trained_slovak_pipeline_en_5.5.0_3.0_1725518926627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trained_slovak_pipeline_en_5.5.0_3.0_1725518926627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trained_slovak_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trained_slovak_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trained_slovak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/trained_slovak + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_en.md new file mode 100644 index 00000000000000..03fb3d0f566982 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English translation_for_recipes_english_french MarianTransformer from PaulineSanchez +author: John Snow Labs +name: translation_for_recipes_english_french +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_for_recipes_english_french` is a English model originally trained by PaulineSanchez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_for_recipes_english_french_en_5.5.0_3.0_1725545656410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_for_recipes_english_french_en_5.5.0_3.0_1725545656410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("translation_for_recipes_english_french","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("translation_for_recipes_english_french","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_for_recipes_english_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.4 MB| + +## References + +https://huggingface.co/PaulineSanchez/translation_for_recipes_en_fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_pipeline_en.md new file mode 100644 index 00000000000000..f25adaa85eeb97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-translation_for_recipes_english_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English translation_for_recipes_english_french_pipeline pipeline MarianTransformer from PaulineSanchez +author: John Snow Labs +name: translation_for_recipes_english_french_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_for_recipes_english_french_pipeline` is a English model originally trained by PaulineSanchez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_for_recipes_english_french_pipeline_en_5.5.0_3.0_1725545689284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_for_recipes_english_french_pipeline_en_5.5.0_3.0_1725545689284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translation_for_recipes_english_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translation_for_recipes_english_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_for_recipes_english_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.0 MB| + +## References + +https://huggingface.co/PaulineSanchez/translation_for_recipes_en_fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_en.md b/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_en.md new file mode 100644 index 00000000000000..f918ae44b1e06b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English translations_english_german MarianTransformer from OlympusGG +author: John Snow Labs +name: translations_english_german +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translations_english_german` is a English model originally trained by OlympusGG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translations_english_german_en_5.5.0_3.0_1725545115546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translations_english_german_en_5.5.0_3.0_1725545115546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("translations_english_german","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("translations_english_german","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translations_english_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.2 MB| + +## References + +https://huggingface.co/OlympusGG/translations-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_pipeline_en.md new file mode 100644 index 00000000000000..d1b174e6280132 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-translations_english_german_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English translations_english_german_pipeline pipeline MarianTransformer from OlympusGG +author: John Snow Labs +name: translations_english_german_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translations_english_german_pipeline` is a English model originally trained by OlympusGG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translations_english_german_pipeline_en_5.5.0_3.0_1725545142678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translations_english_german_pipeline_en_5.5.0_3.0_1725545142678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translations_english_german_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translations_english_german_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translations_english_german_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|499.7 MB| + +## References + +https://huggingface.co/OlympusGG/translations-en-de + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_en.md b/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_en.md new file mode 100644 index 00000000000000..9f3aeb96e1040a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English tugstugi_bengaliai_asr_whisper_medium WhisperForCTC from bengaliAI +author: John Snow Labs +name: tugstugi_bengaliai_asr_whisper_medium +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tugstugi_bengaliai_asr_whisper_medium` is a English model originally trained by bengaliAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tugstugi_bengaliai_asr_whisper_medium_en_5.5.0_3.0_1725551919269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tugstugi_bengaliai_asr_whisper_medium_en_5.5.0_3.0_1725551919269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("tugstugi_bengaliai_asr_whisper_medium","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("tugstugi_bengaliai_asr_whisper_medium", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tugstugi_bengaliai_asr_whisper_medium| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/bengaliAI/tugstugi_bengaliai-asr_whisper-medium \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_pipeline_en.md new file mode 100644 index 00000000000000..517ec774a92132 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-tugstugi_bengaliai_asr_whisper_medium_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English tugstugi_bengaliai_asr_whisper_medium_pipeline pipeline WhisperForCTC from bengaliAI +author: John Snow Labs +name: tugstugi_bengaliai_asr_whisper_medium_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tugstugi_bengaliai_asr_whisper_medium_pipeline` is a English model originally trained by bengaliAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tugstugi_bengaliai_asr_whisper_medium_pipeline_en_5.5.0_3.0_1725552172912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tugstugi_bengaliai_asr_whisper_medium_pipeline_en_5.5.0_3.0_1725552172912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tugstugi_bengaliai_asr_whisper_medium_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tugstugi_bengaliai_asr_whisper_medium_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tugstugi_bengaliai_asr_whisper_medium_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/bengaliAI/tugstugi_bengaliai-asr_whisper-medium + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_en.md b/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_en.md new file mode 100644 index 00000000000000..dc620029bef47a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English turkish_acc_80 CamemBertForSequenceClassification from Ppxndpxdd +author: John Snow Labs +name: turkish_acc_80 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_acc_80` is a English model originally trained by Ppxndpxdd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_acc_80_en_5.5.0_3.0_1725543826145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_acc_80_en_5.5.0_3.0_1725543826145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("turkish_acc_80","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("turkish_acc_80", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_acc_80| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|394.3 MB| + +## References + +https://huggingface.co/Ppxndpxdd/tr_acc_80 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_pipeline_en.md new file mode 100644 index 00000000000000..a9f6da43b3e17a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkish_acc_80_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English turkish_acc_80_pipeline pipeline CamemBertForSequenceClassification from Ppxndpxdd +author: John Snow Labs +name: turkish_acc_80_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_acc_80_pipeline` is a English model originally trained by Ppxndpxdd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_acc_80_pipeline_en_5.5.0_3.0_1725543846576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_acc_80_pipeline_en_5.5.0_3.0_1725543846576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("turkish_acc_80_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("turkish_acc_80_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_acc_80_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|394.4 MB| + +## References + +https://huggingface.co/Ppxndpxdd/tr_acc_80 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_pipeline_tr.md new file mode 100644 index 00000000000000..172fda15b090f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish turkish_base_bert_capitalization_correction_pipeline pipeline BertForTokenClassification from ytu-ce-cosmos +author: John Snow Labs +name: turkish_base_bert_capitalization_correction_pipeline +date: 2024-09-05 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_base_bert_capitalization_correction_pipeline` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_base_bert_capitalization_correction_pipeline_tr_5.5.0_3.0_1725539291138.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_base_bert_capitalization_correction_pipeline_tr_5.5.0_3.0_1725539291138.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("turkish_base_bert_capitalization_correction_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("turkish_base_bert_capitalization_correction_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_base_bert_capitalization_correction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|413.1 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-base-bert-capitalization-correction + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_tr.md b/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_tr.md new file mode 100644 index 00000000000000..3b105b35561f1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkish_base_bert_capitalization_correction_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish turkish_base_bert_capitalization_correction BertForTokenClassification from ytu-ce-cosmos +author: John Snow Labs +name: turkish_base_bert_capitalization_correction +date: 2024-09-05 +tags: [tr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_base_bert_capitalization_correction` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_base_bert_capitalization_correction_tr_5.5.0_3.0_1725539270105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_base_bert_capitalization_correction_tr_5.5.0_3.0_1725539270105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("turkish_base_bert_capitalization_correction","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("turkish_base_bert_capitalization_correction", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_base_bert_capitalization_correction| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|413.1 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-base-bert-capitalization-correction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_en.md b/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_en.md new file mode 100644 index 00000000000000..56f861e23b01e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English turkishtranslator MarianTransformer from PontifexMaximus +author: John Snow Labs +name: turkishtranslator +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkishtranslator` is a English model originally trained by PontifexMaximus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkishtranslator_en_5.5.0_3.0_1725544719420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkishtranslator_en_5.5.0_3.0_1725544719420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("turkishtranslator","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("turkishtranslator","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkishtranslator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|525.3 MB| + +## References + +https://huggingface.co/PontifexMaximus/TurkishTranslator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_pipeline_en.md new file mode 100644 index 00000000000000..4c21e75d2541d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-turkishtranslator_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English turkishtranslator_pipeline pipeline MarianTransformer from PontifexMaximus +author: John Snow Labs +name: turkishtranslator_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkishtranslator_pipeline` is a English model originally trained by PontifexMaximus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkishtranslator_pipeline_en_5.5.0_3.0_1725544747203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkishtranslator_pipeline_en_5.5.0_3.0_1725544747203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("turkishtranslator_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("turkishtranslator_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkishtranslator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|525.9 MB| + +## References + +https://huggingface.co/PontifexMaximus/TurkishTranslator + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en.md new file mode 100644 index 00000000000000..5d4c695b2375f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_data_xlm_roberta_base_sentiment_finetuned_memes XlmRoBertaForSequenceClassification from jayantapaul888 +author: John Snow Labs +name: twitter_data_xlm_roberta_base_sentiment_finetuned_memes +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_data_xlm_roberta_base_sentiment_finetuned_memes` is a English model originally trained by jayantapaul888. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en_5.5.0_3.0_1725529654722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_data_xlm_roberta_base_sentiment_finetuned_memes_en_5.5.0_3.0_1725529654722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_data_xlm_roberta_base_sentiment_finetuned_memes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_data_xlm_roberta_base_sentiment_finetuned_memes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_data_xlm_roberta_base_sentiment_finetuned_memes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|799.6 MB| + +## References + +https://huggingface.co/jayantapaul888/twitter-data-xlm-roberta-base-sentiment-finetuned-memes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en.md new file mode 100644 index 00000000000000..e0836ace703881 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline pipeline XlmRoBertaForSequenceClassification from jayantapaul888 +author: John Snow Labs +name: twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline` is a English model originally trained by jayantapaul888. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en_5.5.0_3.0_1725529791522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline_en_5.5.0_3.0_1725529791522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_data_xlm_roberta_base_sentiment_finetuned_memes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|799.6 MB| + +## References + +https://huggingface.co/jayantapaul888/twitter-data-xlm-roberta-base-sentiment-finetuned-memes + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2019_90m_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2019_90m_pipeline_en.md new file mode 100644 index 00000000000000..f75599e6a091e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2019_90m_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_2019_90m_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_2019_90m_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_2019_90m_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_2019_90m_pipeline_en_5.5.0_3.0_1725573072301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_2019_90m_pipeline_en_5.5.0_3.0_1725573072301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_2019_90m_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_2019_90m_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_2019_90m_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-2019-90m + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2021_124m_irony_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2021_124m_irony_pipeline_en.md new file mode 100644 index 00000000000000..ec74cd8cc7e911 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_2021_124m_irony_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_2021_124m_irony_pipeline pipeline RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_2021_124m_irony_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_2021_124m_irony_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_2021_124m_irony_pipeline_en_5.5.0_3.0_1725541556952.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_2021_124m_irony_pipeline_en_5.5.0_3.0_1725541556952.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_2021_124m_irony_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_2021_124m_irony_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_2021_124m_irony_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-2021-124m-irony + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_en.md new file mode 100644 index 00000000000000..09a6ab199cfb9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_jun2021 RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2021 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2021` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2021_en_5.5.0_3.0_1725573292879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2021_en_5.5.0_3.0_1725573292879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2021","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2021","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2021| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2021 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_pipeline_en.md new file mode 100644 index 00000000000000..3d95e55ac83162 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2021_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_jun2021_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2021_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2021_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2021_pipeline_en_5.5.0_3.0_1725573315996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2021_pipeline_en_5.5.0_3.0_1725573315996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_jun2021_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_jun2021_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2021_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2021 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_en.md new file mode 100644 index 00000000000000..66541d08ad23a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_jun2022_15m_incr RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2022_15m_incr +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2022_15m_incr` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2022_15m_incr_en_5.5.0_3.0_1725572204372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2022_15m_incr_en_5.5.0_3.0_1725572204372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2022_15m_incr","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2022_15m_incr","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2022_15m_incr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2022-15M-incr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_pipeline_en.md new file mode 100644 index 00000000000000..eb006b5885aa52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-twitter_roberta_base_jun2022_15m_incr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_jun2022_15m_incr_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2022_15m_incr_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2022_15m_incr_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2022_15m_incr_pipeline_en_5.5.0_3.0_1725572227841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2022_15m_incr_pipeline_en_5.5.0_3.0_1725572227841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_jun2022_15m_incr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_jun2022_15m_incr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2022_15m_incr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2022-15M-incr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-ukraine_waray_philippines_pov_uk.md b/docs/_posts/ahmedlone127/2024-09-05-ukraine_waray_philippines_pov_uk.md new file mode 100644 index 00000000000000..f1d8f7d0be9129 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-ukraine_waray_philippines_pov_uk.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Ukrainian ukraine_waray_philippines_pov XlmRoBertaForSequenceClassification from YaraKyrychenko +author: John Snow Labs +name: ukraine_waray_philippines_pov +date: 2024-09-05 +tags: [uk, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: uk +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ukraine_waray_philippines_pov` is a Ukrainian model originally trained by YaraKyrychenko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ukraine_waray_philippines_pov_uk_5.5.0_3.0_1725513816855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ukraine_waray_philippines_pov_uk_5.5.0_3.0_1725513816855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("ukraine_waray_philippines_pov","uk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("ukraine_waray_philippines_pov", "uk") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ukraine_waray_philippines_pov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|uk| +|Size:|877.0 MB| + +## References + +https://huggingface.co/YaraKyrychenko/ukraine-war-pov \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_en.md b/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_en.md new file mode 100644 index 00000000000000..24d3769fbb86f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English umberto_fine_tuned_irony_sarcasm CamemBertForSequenceClassification from lupobricco +author: John Snow Labs +name: umberto_fine_tuned_irony_sarcasm +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, camembert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`umberto_fine_tuned_irony_sarcasm` is a English model originally trained by lupobricco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_irony_sarcasm_en_5.5.0_3.0_1725544073499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_irony_sarcasm_en_5.5.0_3.0_1725544073499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = CamemBertForSequenceClassification.pretrained("umberto_fine_tuned_irony_sarcasm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = CamemBertForSequenceClassification.pretrained("umberto_fine_tuned_irony_sarcasm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|umberto_fine_tuned_irony_sarcasm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|392.8 MB| + +## References + +https://huggingface.co/lupobricco/umBERTo_fine-tuned_irony_sarcasm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_pipeline_en.md new file mode 100644 index 00000000000000..b740bdbf7bf1de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-umberto_fine_tuned_irony_sarcasm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English umberto_fine_tuned_irony_sarcasm_pipeline pipeline CamemBertForSequenceClassification from lupobricco +author: John Snow Labs +name: umberto_fine_tuned_irony_sarcasm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`umberto_fine_tuned_irony_sarcasm_pipeline` is a English model originally trained by lupobricco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_irony_sarcasm_pipeline_en_5.5.0_3.0_1725544100290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/umberto_fine_tuned_irony_sarcasm_pipeline_en_5.5.0_3.0_1725544100290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("umberto_fine_tuned_irony_sarcasm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("umberto_fine_tuned_irony_sarcasm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|umberto_fine_tuned_irony_sarcasm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|392.8 MB| + +## References + +https://huggingface.co/lupobricco/umBERTo_fine-tuned_irony_sarcasm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_en.md new file mode 100644 index 00000000000000..4b287dca053ecf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English unibert_distilbert_2 DistilBertForTokenClassification from dbala02 +author: John Snow Labs +name: unibert_distilbert_2 +date: 2024-09-05 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unibert_distilbert_2` is a English model originally trained by dbala02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unibert_distilbert_2_en_5.5.0_3.0_1725496154914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unibert_distilbert_2_en_5.5.0_3.0_1725496154914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("unibert_distilbert_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("unibert_distilbert_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unibert_distilbert_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/dbala02/uniBERT.distilBERT.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_pipeline_en.md new file mode 100644 index 00000000000000..ec40d211d85815 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-unibert_distilbert_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English unibert_distilbert_2_pipeline pipeline DistilBertForTokenClassification from dbala02 +author: John Snow Labs +name: unibert_distilbert_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unibert_distilbert_2_pipeline` is a English model originally trained by dbala02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unibert_distilbert_2_pipeline_en_5.5.0_3.0_1725496166517.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unibert_distilbert_2_pipeline_en_5.5.0_3.0_1725496166517.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("unibert_distilbert_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("unibert_distilbert_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unibert_distilbert_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/dbala02/uniBERT.distilBERT.2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_en.md b/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_en.md new file mode 100644 index 00000000000000..84c8956dc68632 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English unspsc_product_category DistilBertForSequenceClassification from woland2k +author: John Snow Labs +name: unspsc_product_category +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unspsc_product_category` is a English model originally trained by woland2k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unspsc_product_category_en_5.5.0_3.0_1725580441280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unspsc_product_category_en_5.5.0_3.0_1725580441280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("unspsc_product_category","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("unspsc_product_category", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unspsc_product_category| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|260.9 MB| + +## References + +https://huggingface.co/woland2k/unspsc-product-category \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_pipeline_en.md new file mode 100644 index 00000000000000..7f677471ba5370 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-unspsc_product_category_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English unspsc_product_category_pipeline pipeline DistilBertForSequenceClassification from woland2k +author: John Snow Labs +name: unspsc_product_category_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unspsc_product_category_pipeline` is a English model originally trained by woland2k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unspsc_product_category_pipeline_en_5.5.0_3.0_1725580454744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unspsc_product_category_pipeline_en_5.5.0_3.0_1725580454744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("unspsc_product_category_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("unspsc_product_category_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unspsc_product_category_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|260.9 MB| + +## References + +https://huggingface.co/woland2k/unspsc-product-category + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_en.md b/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_en.md new file mode 100644 index 00000000000000..2c44db5c1b79bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English usclm_robrta_base_mk1 RoBertaEmbeddings from hyperdemocracy +author: John Snow Labs +name: usclm_robrta_base_mk1 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`usclm_robrta_base_mk1` is a English model originally trained by hyperdemocracy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/usclm_robrta_base_mk1_en_5.5.0_3.0_1725577867687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/usclm_robrta_base_mk1_en_5.5.0_3.0_1725577867687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("usclm_robrta_base_mk1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("usclm_robrta_base_mk1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|usclm_robrta_base_mk1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|467.0 MB| + +## References + +https://huggingface.co/hyperdemocracy/usclm-robrta-base-mk1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_pipeline_en.md new file mode 100644 index 00000000000000..4d58a655bbcfd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-usclm_robrta_base_mk1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English usclm_robrta_base_mk1_pipeline pipeline RoBertaEmbeddings from hyperdemocracy +author: John Snow Labs +name: usclm_robrta_base_mk1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`usclm_robrta_base_mk1_pipeline` is a English model originally trained by hyperdemocracy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/usclm_robrta_base_mk1_pipeline_en_5.5.0_3.0_1725577891498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/usclm_robrta_base_mk1_pipeline_en_5.5.0_3.0_1725577891498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("usclm_robrta_base_mk1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("usclm_robrta_base_mk1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|usclm_robrta_base_mk1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|467.0 MB| + +## References + +https://huggingface.co/hyperdemocracy/usclm-robrta-base-mk1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_en.md b/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_en.md new file mode 100644 index 00000000000000..77fd41490e57ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English vidula_multilanguage_finetune MarianTransformer from vidula123 +author: John Snow Labs +name: vidula_multilanguage_finetune +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vidula_multilanguage_finetune` is a English model originally trained by vidula123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vidula_multilanguage_finetune_en_5.5.0_3.0_1725545788547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vidula_multilanguage_finetune_en_5.5.0_3.0_1725545788547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("vidula_multilanguage_finetune","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("vidula_multilanguage_finetune","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vidula_multilanguage_finetune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|523.2 MB| + +## References + +https://huggingface.co/vidula123/Vidula_Multilanguage_Finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_pipeline_en.md new file mode 100644 index 00000000000000..6e4f2f674ac163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-vidula_multilanguage_finetune_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English vidula_multilanguage_finetune_pipeline pipeline MarianTransformer from vidula123 +author: John Snow Labs +name: vidula_multilanguage_finetune_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vidula_multilanguage_finetune_pipeline` is a English model originally trained by vidula123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vidula_multilanguage_finetune_pipeline_en_5.5.0_3.0_1725545819661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vidula_multilanguage_finetune_pipeline_en_5.5.0_3.0_1725545819661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("vidula_multilanguage_finetune_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("vidula_multilanguage_finetune_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vidula_multilanguage_finetune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|523.7 MB| + +## References + +https://huggingface.co/vidula123/Vidula_Multilanguage_Finetune + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-vietnews_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-vietnews_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..5d8c6d37229b17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-vietnews_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English vietnews_roberta_base_pipeline pipeline RoBertaEmbeddings from truongpdd +author: John Snow Labs +name: vietnews_roberta_base_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vietnews_roberta_base_pipeline` is a English model originally trained by truongpdd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vietnews_roberta_base_pipeline_en_5.5.0_3.0_1725572397481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vietnews_roberta_base_pipeline_en_5.5.0_3.0_1725572397481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("vietnews_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("vietnews_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vietnews_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/truongpdd/vietnews-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_en.md b/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_en.md new file mode 100644 index 00000000000000..6ed702fa574ef2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_en.md @@ -0,0 +1,120 @@ +--- +layout: model +title: English vit_l_14_336 CLIPForZeroShotClassification from asakhare +author: John Snow Labs +name: vit_l_14_336 +date: 2024-09-05 +tags: [en, open_source, onnx, zero_shot, clip, image] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CLIPForZeroShotClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vit_l_14_336` is a English model originally trained by asakhare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vit_l_14_336_en_5.5.0_3.0_1725540597685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vit_l_14_336_en_5.5.0_3.0_1725540597685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") + +candidateLabels = [ + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox"] + +ImageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") + +imageClassifier = CLIPForZeroShotClassification.pretrained("vit_l_14_336","en") \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +pipeline = Pipeline().setStages([ImageAssembler, imageClassifier]) +pipelineModel = pipeline.fit(imageDF) +pipelineDF = pipelineModel.transform(imageDF) + + +``` +```scala + + +val imageDF = ResourceHelper.spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val candidateLabels = Array( + "a photo of a bird", + "a photo of a cat", + "a photo of a dog", + "a photo of a hen", + "a photo of a hippo", + "a photo of a room", + "a photo of a tractor", + "a photo of an ostrich", + "a photo of an ox") + +val imageAssembler = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = CLIPForZeroShotClassification.pretrained("vit_l_14_336","en") \ + .setInputCols(Array("image_assembler")) \ + .setOutputCol("label") \ + .setCandidateLabels(candidateLabels) + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageClassifier)) +val pipelineModel = pipeline.fit(imageDF) +val pipelineDF = pipelineModel.transform(imageDF) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vit_l_14_336| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[label]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/asakhare/vit-l-14-336 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_pipeline_en.md new file mode 100644 index 00000000000000..f410ca280e22a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-vit_l_14_336_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English vit_l_14_336_pipeline pipeline CLIPForZeroShotClassification from asakhare +author: John Snow Labs +name: vit_l_14_336_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vit_l_14_336_pipeline` is a English model originally trained by asakhare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vit_l_14_336_pipeline_en_5.5.0_3.0_1725540677364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vit_l_14_336_pipeline_en_5.5.0_3.0_1725540677364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("vit_l_14_336_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("vit_l_14_336_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vit_l_14_336_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/asakhare/vit-l-14-336 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_en.md new file mode 100644 index 00000000000000..f5e0e0caed9916 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_sinhala_audio_tonga_tonga_islands_text WhisperForCTC from AqeelShafy7 +author: John Snow Labs +name: whisper_sinhala_audio_tonga_tonga_islands_text +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_sinhala_audio_tonga_tonga_islands_text` is a English model originally trained by AqeelShafy7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_sinhala_audio_tonga_tonga_islands_text_en_5.5.0_3.0_1725547474068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_sinhala_audio_tonga_tonga_islands_text_en_5.5.0_3.0_1725547474068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_sinhala_audio_tonga_tonga_islands_text","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_sinhala_audio_tonga_tonga_islands_text", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_sinhala_audio_tonga_tonga_islands_text| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/AqeelShafy7/Whisper-Sinhala_Audio_to_Text \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en.md new file mode 100644 index 00000000000000..43b37ec4f96044 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_sinhala_audio_tonga_tonga_islands_text_pipeline pipeline WhisperForCTC from AqeelShafy7 +author: John Snow Labs +name: whisper_sinhala_audio_tonga_tonga_islands_text_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_sinhala_audio_tonga_tonga_islands_text_pipeline` is a English model originally trained by AqeelShafy7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en_5.5.0_3.0_1725547563320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_sinhala_audio_tonga_tonga_islands_text_pipeline_en_5.5.0_3.0_1725547563320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_sinhala_audio_tonga_tonga_islands_text_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_sinhala_audio_tonga_tonga_islands_text_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_sinhala_audio_tonga_tonga_islands_text_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/AqeelShafy7/Whisper-Sinhala_Audio_to_Text + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_en.md new file mode 100644 index 00000000000000..7f5491a80fbb92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_arabic_arbml WhisperForCTC from arbml +author: John Snow Labs +name: whisper_small_arabic_arbml +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_arbml` is a English model originally trained by arbml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_arbml_en_5.5.0_3.0_1725548908962.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_arbml_en_5.5.0_3.0_1725548908962.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_arabic_arbml","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_arabic_arbml", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_arbml| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arbml/whisper-small-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_pipeline_en.md new file mode 100644 index 00000000000000..e697caaac94c36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_arabic_arbml_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_arabic_arbml_pipeline pipeline WhisperForCTC from arbml +author: John Snow Labs +name: whisper_small_arabic_arbml_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_arbml_pipeline` is a English model originally trained by arbml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_arbml_pipeline_en_5.5.0_3.0_1725548998676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_arbml_pipeline_en_5.5.0_3.0_1725548998676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_arabic_arbml_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_arabic_arbml_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_arbml_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arbml/whisper-small-ar + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_bn.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_bn.md new file mode 100644 index 00000000000000..832d63e9a0a953 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_bn.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Bengali whisper_small_bengali_anuragshas WhisperForCTC from anuragshas +author: John Snow Labs +name: whisper_small_bengali_anuragshas +date: 2024-09-05 +tags: [bn, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: bn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_bengali_anuragshas` is a Bengali model originally trained by anuragshas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_bengali_anuragshas_bn_5.5.0_3.0_1725549796908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_bengali_anuragshas_bn_5.5.0_3.0_1725549796908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_bengali_anuragshas","bn") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_bengali_anuragshas", "bn") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_bengali_anuragshas| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|bn| +|Size:|1.7 GB| + +## References + +https://huggingface.co/anuragshas/whisper-small-bn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_pipeline_bn.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_pipeline_bn.md new file mode 100644 index 00000000000000..4ebf281ff34855 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_bengali_anuragshas_pipeline_bn.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Bengali whisper_small_bengali_anuragshas_pipeline pipeline WhisperForCTC from anuragshas +author: John Snow Labs +name: whisper_small_bengali_anuragshas_pipeline +date: 2024-09-05 +tags: [bn, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: bn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_bengali_anuragshas_pipeline` is a Bengali model originally trained by anuragshas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_bengali_anuragshas_pipeline_bn_5.5.0_3.0_1725549890937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_bengali_anuragshas_pipeline_bn_5.5.0_3.0_1725549890937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_bengali_anuragshas_pipeline", lang = "bn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_bengali_anuragshas_pipeline", lang = "bn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_bengali_anuragshas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|bn| +|Size:|1.7 GB| + +## References + +https://huggingface.co/anuragshas/whisper-small-bn + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_egyptian_asr_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_egyptian_asr_v2_pipeline_en.md new file mode 100644 index 00000000000000..9e4b1bbe015852 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_egyptian_asr_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_egyptian_asr_v2_pipeline pipeline WhisperForCTC from dreahim +author: John Snow Labs +name: whisper_small_egyptian_asr_v2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_egyptian_asr_v2_pipeline` is a English model originally trained by dreahim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_egyptian_asr_v2_pipeline_en_5.5.0_3.0_1725548504717.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_egyptian_asr_v2_pipeline_en_5.5.0_3.0_1725548504717.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_egyptian_asr_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_egyptian_asr_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_egyptian_asr_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/dreahim/whisper-small-Egyptian_ASR_v2 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hindi_abhiramk6_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hindi_abhiramk6_pipeline_hi.md new file mode 100644 index 00000000000000..4ad47a5ccd168b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hindi_abhiramk6_pipeline_hi.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hindi whisper_small_hindi_abhiramk6_pipeline pipeline WhisperForCTC from abhiramk6 +author: John Snow Labs +name: whisper_small_hindi_abhiramk6_pipeline +date: 2024-09-05 +tags: [hi, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hindi_abhiramk6_pipeline` is a Hindi model originally trained by abhiramk6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_abhiramk6_pipeline_hi_5.5.0_3.0_1725547792978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_abhiramk6_pipeline_hi_5.5.0_3.0_1725547792978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_hindi_abhiramk6_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_hindi_abhiramk6_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hindi_abhiramk6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|1.7 GB| + +## References + +https://huggingface.co/abhiramk6/whisper-small-hi + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_en.md new file mode 100644 index 00000000000000..952864fb9ada4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_hre4_2 WhisperForCTC from ntviet +author: John Snow Labs +name: whisper_small_hre4_2 +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hre4_2` is a English model originally trained by ntviet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hre4_2_en_5.5.0_3.0_1725550878452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hre4_2_en_5.5.0_3.0_1725550878452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_hre4_2","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_hre4_2", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hre4_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ntviet/whisper-small-hre4.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_pipeline_en.md new file mode 100644 index 00000000000000..3d0a21283e5d0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_small_hre4_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_hre4_2_pipeline pipeline WhisperForCTC from ntviet +author: John Snow Labs +name: whisper_small_hre4_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hre4_2_pipeline` is a English model originally trained by ntviet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hre4_2_pipeline_en_5.5.0_3.0_1725550972369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hre4_2_pipeline_en_5.5.0_3.0_1725550972369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_hre4_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_hre4_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hre4_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ntviet/whisper-small-hre4.2 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_en.md new file mode 100644 index 00000000000000..adb68850878317 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_test_quant_smokxy WhisperForCTC from smokxy +author: John Snow Labs +name: whisper_test_quant_smokxy +date: 2024-09-05 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_test_quant_smokxy` is a English model originally trained by smokxy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_test_quant_smokxy_en_5.5.0_3.0_1725548810280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_test_quant_smokxy_en_5.5.0_3.0_1725548810280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_test_quant_smokxy","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_test_quant_smokxy", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_test_quant_smokxy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/smokxy/whisper-test-quant \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_pipeline_en.md new file mode 100644 index 00000000000000..865ca0d13909ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-whisper_test_quant_smokxy_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_test_quant_smokxy_pipeline pipeline WhisperForCTC from smokxy +author: John Snow Labs +name: whisper_test_quant_smokxy_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_test_quant_smokxy_pipeline` is a English model originally trained by smokxy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_test_quant_smokxy_pipeline_en_5.5.0_3.0_1725548832787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_test_quant_smokxy_pipeline_en_5.5.0_3.0_1725548832787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_test_quant_smokxy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_test_quant_smokxy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_test_quant_smokxy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/smokxy/whisper-test-quant + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-wmdp_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-05-wmdp_classifier_en.md new file mode 100644 index 00000000000000..eedf5a10ab9b3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-wmdp_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English wmdp_classifier RoBertaForSequenceClassification from chrisliu298 +author: John Snow Labs +name: wmdp_classifier +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wmdp_classifier` is a English model originally trained by chrisliu298. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wmdp_classifier_en_5.5.0_3.0_1725541528116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wmdp_classifier_en_5.5.0_3.0_1725541528116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("wmdp_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("wmdp_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wmdp_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|453.9 MB| + +## References + +https://huggingface.co/chrisliu298/wmdp_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_en.md b/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_en.md new file mode 100644 index 00000000000000..e8742e47f66638 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English wordwizard_masked_lm DistilBertEmbeddings from Priyanshuchaudhary2425 +author: John Snow Labs +name: wordwizard_masked_lm +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wordwizard_masked_lm` is a English model originally trained by Priyanshuchaudhary2425. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wordwizard_masked_lm_en_5.5.0_3.0_1725524041423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wordwizard_masked_lm_en_5.5.0_3.0_1725524041423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("wordwizard_masked_lm","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("wordwizard_masked_lm","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wordwizard_masked_lm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Priyanshuchaudhary2425/WordWizard-masked-LM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_pipeline_en.md new file mode 100644 index 00000000000000..ed95a14f6a9191 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-wordwizard_masked_lm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English wordwizard_masked_lm_pipeline pipeline DistilBertEmbeddings from Priyanshuchaudhary2425 +author: John Snow Labs +name: wordwizard_masked_lm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wordwizard_masked_lm_pipeline` is a English model originally trained by Priyanshuchaudhary2425. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wordwizard_masked_lm_pipeline_en_5.5.0_3.0_1725524055735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wordwizard_masked_lm_pipeline_en_5.5.0_3.0_1725524055735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wordwizard_masked_lm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wordwizard_masked_lm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wordwizard_masked_lm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Priyanshuchaudhary2425/WordWizard-masked-LM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_en.md new file mode 100644 index 00000000000000..82192d4e72dc22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_emo_t_maryamfp XlmRoBertaForSequenceClassification from maryamfp +author: John Snow Labs +name: xlm_emo_t_maryamfp +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_emo_t_maryamfp` is a English model originally trained by maryamfp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_emo_t_maryamfp_en_5.5.0_3.0_1725537096453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_emo_t_maryamfp_en_5.5.0_3.0_1725537096453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_emo_t_maryamfp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_emo_t_maryamfp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_emo_t_maryamfp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/maryamfp/xlm-emo-t \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_pipeline_en.md new file mode 100644 index 00000000000000..0eef377fb171ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_emo_t_maryamfp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_emo_t_maryamfp_pipeline pipeline XlmRoBertaForSequenceClassification from maryamfp +author: John Snow Labs +name: xlm_emo_t_maryamfp_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_emo_t_maryamfp_pipeline` is a English model originally trained by maryamfp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_emo_t_maryamfp_pipeline_en_5.5.0_3.0_1725537149483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_emo_t_maryamfp_pipeline_en_5.5.0_3.0_1725537149483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_emo_t_maryamfp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_emo_t_maryamfp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_emo_t_maryamfp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/maryamfp/xlm-emo-t + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_en.md new file mode 100644 index 00000000000000..e1e610d5e79889 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_nli_m_korsemeval XlmRoBertaForSequenceClassification from KorABSA +author: John Snow Labs +name: xlm_nli_m_korsemeval +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_nli_m_korsemeval` is a English model originally trained by KorABSA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_nli_m_korsemeval_en_5.5.0_3.0_1725526355741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_nli_m_korsemeval_en_5.5.0_3.0_1725526355741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_nli_m_korsemeval","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_nli_m_korsemeval", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_nli_m_korsemeval| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|834.4 MB| + +## References + +https://huggingface.co/KorABSA/XLM-NLI-M-KorSemEval \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_pipeline_en.md new file mode 100644 index 00000000000000..df1253779feade --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_nli_m_korsemeval_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_nli_m_korsemeval_pipeline pipeline XlmRoBertaForSequenceClassification from KorABSA +author: John Snow Labs +name: xlm_nli_m_korsemeval_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_nli_m_korsemeval_pipeline` is a English model originally trained by KorABSA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_nli_m_korsemeval_pipeline_en_5.5.0_3.0_1725526445629.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_nli_m_korsemeval_pipeline_en_5.5.0_3.0_1725526445629.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_nli_m_korsemeval_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_nli_m_korsemeval_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_nli_m_korsemeval_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.4 MB| + +## References + +https://huggingface.co/KorABSA/XLM-NLI-M-KorSemEval + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_pretrain_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_pretrain_en.md new file mode 100644 index 00000000000000..8136ba25026753 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_pretrain_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_pretrain XlmRoBertaEmbeddings from hadifar +author: John Snow Labs +name: xlm_pretrain +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_pretrain` is a English model originally trained by hadifar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_pretrain_en_5.5.0_3.0_1725531664915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_pretrain_en_5.5.0_3.0_1725531664915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_pretrain","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_pretrain","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_pretrain| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/hadifar/xlm_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_1024_256_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_1024_256_en.md new file mode 100644 index 00000000000000..d30bef329c0f1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_1024_256_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_1024_256 XlmRoBertaEmbeddings from kdercksen +author: John Snow Labs +name: xlm_roberta_base_1024_256 +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_1024_256` is a English model originally trained by kdercksen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_1024_256_en_5.5.0_3.0_1725531937998.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_1024_256_en_5.5.0_3.0_1725531937998.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_1024_256","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_1024_256","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_1024_256| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|653.9 MB| + +## References + +https://huggingface.co/kdercksen/xlm-roberta-base-1024-256 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_en.md new file mode 100644 index 00000000000000..9bbaaf538babdd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_balance_mixed_aug_swap XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_balance_mixed_aug_swap +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_balance_mixed_aug_swap` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_swap_en_5.5.0_3.0_1725527775621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_swap_en_5.5.0_3.0_1725527775621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_balance_mixed_aug_swap","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_balance_mixed_aug_swap", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_balance_mixed_aug_swap| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|798.0 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Balance_Mixed-aug_swap \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_pipeline_en.md new file mode 100644 index 00000000000000..c83ea7e6ed2938 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_balance_mixed_aug_swap_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_balance_mixed_aug_swap_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_balance_mixed_aug_swap_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_balance_mixed_aug_swap_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_swap_pipeline_en_5.5.0_3.0_1725527896255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_balance_mixed_aug_swap_pipeline_en_5.5.0_3.0_1725527896255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_balance_mixed_aug_swap_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_balance_mixed_aug_swap_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_balance_mixed_aug_swap_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|798.0 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Balance_Mixed-aug_swap + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_en.md new file mode 100644 index 00000000000000..92098908dafccc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_csfd_slovak XlmRoBertaForSequenceClassification from gaussalgo +author: John Snow Labs +name: xlm_roberta_base_csfd_slovak +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_csfd_slovak` is a English model originally trained by gaussalgo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_csfd_slovak_en_5.5.0_3.0_1725536521914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_csfd_slovak_en_5.5.0_3.0_1725536521914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_csfd_slovak","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_csfd_slovak", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_csfd_slovak| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|815.2 MB| + +## References + +https://huggingface.co/gaussalgo/xlm-roberta-base_CSFD-sk \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_pipeline_en.md new file mode 100644 index 00000000000000..ecb60dc46f2274 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_csfd_slovak_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_csfd_slovak_pipeline pipeline XlmRoBertaForSequenceClassification from gaussalgo +author: John Snow Labs +name: xlm_roberta_base_csfd_slovak_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_csfd_slovak_pipeline` is a English model originally trained by gaussalgo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_csfd_slovak_pipeline_en_5.5.0_3.0_1725536649949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_csfd_slovak_pipeline_en_5.5.0_3.0_1725536649949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_csfd_slovak_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_csfd_slovak_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_csfd_slovak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|815.2 MB| + +## References + +https://huggingface.co/gaussalgo/xlm-roberta-base_CSFD-sk + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en.md new file mode 100644 index 00000000000000..4f46633f2c9a81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_final_mixed_aug_replace_tfidf_1 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_final_mixed_aug_replace_tfidf_1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_final_mixed_aug_replace_tfidf_1` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en_5.5.0_3.0_1725537864254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_replace_tfidf_1_en_5.5.0_3.0_1725537864254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_mixed_aug_replace_tfidf_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_mixed_aug_replace_tfidf_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_final_mixed_aug_replace_tfidf_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|794.9 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Final_Mixed-aug_replace_tfidf-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en.md new file mode 100644 index 00000000000000..5c3d773d3d9434 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en_5.5.0_3.0_1725537989256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline_en_5.5.0_3.0_1725537989256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_final_mixed_aug_replace_tfidf_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.9 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Final_Mixed-aug_replace_tfidf-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_en.md new file mode 100644 index 00000000000000..e7e85193321f93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_final_vietnam_train_1 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_final_vietnam_train_1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_final_vietnam_train_1` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_vietnam_train_1_en_5.5.0_3.0_1725530692575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_vietnam_train_1_en_5.5.0_3.0_1725530692575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_vietnam_train_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_vietnam_train_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_final_vietnam_train_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Final_VietNam-train-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_pipeline_en.md new file mode 100644 index 00000000000000..a0deab21407814 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_final_vietnam_train_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_final_vietnam_train_1_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_final_vietnam_train_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_final_vietnam_train_1_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_vietnam_train_1_pipeline_en_5.5.0_3.0_1725530822221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_vietnam_train_1_pipeline_en_5.5.0_3.0_1725530822221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_final_vietnam_train_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_final_vietnam_train_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_final_vietnam_train_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Final_VietNam-train-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en.md new file mode 100644 index 00000000000000..7632c0ca252cdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en_5.5.0_3.0_1725499211733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3_en_5.5.0_3.0_1725499211733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_15_3_2023_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-15-3-2023-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_en.md new file mode 100644 index 00000000000000..066c77dd58e2b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_covidqa XlmRoBertaForQuestionAnswering from chiendvhust +author: John Snow Labs +name: xlm_roberta_base_finetuned_covidqa +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_covidqa` is a English model originally trained by chiendvhust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_covidqa_en_5.5.0_3.0_1725567222941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_covidqa_en_5.5.0_3.0_1725567222941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_covidqa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_covidqa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_covidqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|855.6 MB| + +## References + +https://huggingface.co/chiendvhust/xlm-roberta-base-finetuned-covidQA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_pipeline_en.md new file mode 100644 index 00000000000000..54981eede05d2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_covidqa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_covidqa_pipeline pipeline XlmRoBertaForQuestionAnswering from chiendvhust +author: John Snow Labs +name: xlm_roberta_base_finetuned_covidqa_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_covidqa_pipeline` is a English model originally trained by chiendvhust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_covidqa_pipeline_en_5.5.0_3.0_1725567298191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_covidqa_pipeline_en_5.5.0_3.0_1725567298191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_covidqa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_covidqa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_covidqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|855.6 MB| + +## References + +https://huggingface.co/chiendvhust/xlm-roberta-base-finetuned-covidQA + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_en.md new file mode 100644 index 00000000000000..31d080a9097eca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_english XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_english +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_english` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_english_en_5.5.0_3.0_1725531475478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_english_en_5.5.0_3.0_1725531475478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_english","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_english","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_pipeline_en.md new file mode 100644 index 00000000000000..38c6ebe0266a44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_english_pipeline pipeline XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_english_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_english_pipeline_en_5.5.0_3.0_1725531527784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_english_pipeline_en_5.5.0_3.0_1725531527784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-english + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en.md new file mode 100644 index 00000000000000..11bc1a227ac169 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1 XlmRoBertaForSequenceClassification from vg055 +author: John Snow Labs +name: xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1` is a English model originally trained by vg055. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en_5.5.0_3.0_1725526460102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_en_5.5.0_3.0_1725526460102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|886.6 MB| + +## References + +https://huggingface.co/vg055/xlm-roberta-base-finetuned-IberAuTexTification2024-9010-task2-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en.md new file mode 100644 index 00000000000000..4cfcf57a02cef1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline pipeline XlmRoBertaForSequenceClassification from vg055 +author: John Snow Labs +name: xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline` is a English model originally trained by vg055. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en_5.5.0_3.0_1725526523743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline_en_5.5.0_3.0_1725526523743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_iberautextification2024_9010_task2_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|886.6 MB| + +## References + +https://huggingface.co/vg055/xlm-roberta-base-finetuned-IberAuTexTification2024-9010-task2-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_en.md new file mode 100644 index 00000000000000..9dab1c7cca1cd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kintweetse XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kintweetse +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kintweetse` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kintweetse_en_5.5.0_3.0_1725532396965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kintweetse_en_5.5.0_3.0_1725532396965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_kintweetse","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_kintweetse","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kintweetse| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_pipeline_en.md new file mode 100644 index 00000000000000..24f7a12e74b7d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kintweetse_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kintweetse_pipeline pipeline XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kintweetse_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kintweetse_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kintweetse_pipeline_en_5.5.0_3.0_1725532451630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kintweetse_pipeline_en_5.5.0_3.0_1725532451630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_kintweetse_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_kintweetse_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kintweetse_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kintweetsE + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en.md new file mode 100644 index 00000000000000..b74c2ed8df46e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3 XlmRoBertaForSequenceClassification from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en_5.5.0_3.0_1725536944359.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_en_5.5.0_3.0_1725536944359.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kinyarwanda-kin-finetuned-kin-sent3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en.md new file mode 100644 index 00000000000000..47574086f9fb3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline pipeline XlmRoBertaForSequenceClassification from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en_5.5.0_3.0_1725537000448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline_en_5.5.0_3.0_1725537000448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_sent3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kinyarwanda-kin-finetuned-kin-sent3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en.md new file mode 100644 index 00000000000000..be9f5f8af9a7f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en_5.5.0_3.0_1725531774774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_en_5.5.0_3.0_1725531774774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kinyarwanda-kin-finetuned-kin-tweets-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..3fc5a9a7266fbf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline pipeline XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en_5.5.0_3.0_1725531827196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline_en_5.5.0_3.0_1725531827196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_kinyarwanda_kinyarwanda_finetuned_kinyarwanda_tweets_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/xlm-roberta-base-finetuned-kinyarwanda-kin-finetuned-kin-tweets-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_en.md new file mode 100644 index 00000000000000..d2d0b4bed87597 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_anditya XlmRoBertaForSequenceClassification from anditya +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_anditya +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_anditya` is a English model originally trained by anditya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_anditya_en_5.5.0_3.0_1725525763168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_anditya_en_5.5.0_3.0_1725525763168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_anditya","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_anditya", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_anditya| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/anditya/xlm-roberta-base-finetuned-marc-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en.md new file mode 100644 index 00000000000000..d4981b9a832a8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_anditya_pipeline pipeline XlmRoBertaForSequenceClassification from anditya +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_anditya_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_anditya_pipeline` is a English model originally trained by anditya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en_5.5.0_3.0_1725525852663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_anditya_pipeline_en_5.5.0_3.0_1725525852663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_anditya_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_anditya_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_anditya_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/anditya/xlm-roberta-base-finetuned-marc-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_danwilbury_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_danwilbury_en.md new file mode 100644 index 00000000000000..abfde8f11d9160 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_danwilbury_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_danwilbury XlmRoBertaForSequenceClassification from danwilbury +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_danwilbury +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_danwilbury` is a English model originally trained by danwilbury. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_danwilbury_en_5.5.0_3.0_1725513518239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_danwilbury_en_5.5.0_3.0_1725513518239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_danwilbury","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_danwilbury", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_danwilbury| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.4 MB| + +## References + +https://huggingface.co/danwilbury/xlm-roberta-base-finetuned-marc-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_en.md new file mode 100644 index 00000000000000..b0620e42d24cf8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_dummy XlmRoBertaForSequenceClassification from lewtun +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_dummy +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_dummy` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_dummy_en_5.5.0_3.0_1725537290576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_dummy_en_5.5.0_3.0_1725537290576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_dummy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_dummy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_dummy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/lewtun/xlm-roberta-base-finetuned-marc-en-dummy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en.md new file mode 100644 index 00000000000000..dd3b58c7000812 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_dummy_pipeline pipeline XlmRoBertaForSequenceClassification from lewtun +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_dummy_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_dummy_pipeline` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en_5.5.0_3.0_1725537386785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_dummy_pipeline_en_5.5.0_3.0_1725537386785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_dummy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_dummy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_dummy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/lewtun/xlm-roberta-base-finetuned-marc-en-dummy + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_nace_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_nace_pipeline_en.md new file mode 100644 index 00000000000000..818e4e3543922c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_nace_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_nace_pipeline pipeline XlmRoBertaForSequenceClassification from erst +author: John Snow Labs +name: xlm_roberta_base_finetuned_nace_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_nace_pipeline` is a English model originally trained by erst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nace_pipeline_en_5.5.0_3.0_1725514248601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nace_pipeline_en_5.5.0_3.0_1725514248601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_nace_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_nace_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_nace_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|857.4 MB| + +## References + +https://huggingface.co/erst/xlm-roberta-base-finetuned-nace + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_en.md new file mode 100644 index 00000000000000..a851a48167c6d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_shona XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_shona +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_shona` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_shona_en_5.5.0_3.0_1725555878329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_shona_en_5.5.0_3.0_1725555878329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_shona","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_shona","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_shona| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-shona \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_pipeline_en.md new file mode 100644 index 00000000000000..c8e05b3bc1562e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_shona_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_shona_pipeline pipeline XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_shona_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_shona_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_shona_pipeline_en_5.5.0_3.0_1725555932221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_shona_pipeline_en_5.5.0_3.0_1725555932221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_shona_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_shona_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_shona_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-shona + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_sinquad_v1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_sinquad_v1_en.md new file mode 100644 index 00000000000000..f821fcdf7fad95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_sinquad_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_sinquad_v1 XlmRoBertaForQuestionAnswering from 9wimu9 +author: John Snow Labs +name: xlm_roberta_base_finetuned_sinquad_v1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_sinquad_v1` is a English model originally trained by 9wimu9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_sinquad_v1_en_5.5.0_3.0_1725557918102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_sinquad_v1_en_5.5.0_3.0_1725557918102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_sinquad_v1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_sinquad_v1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_sinquad_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|832.3 MB| + +## References + +https://huggingface.co/9wimu9/xlm-roberta-base-finetuned-sinquad-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_en.md new file mode 100644 index 00000000000000..13d6863d9014f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_somali XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_somali +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_somali` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_somali_en_5.5.0_3.0_1725532035295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_somali_en_5.5.0_3.0_1725532035295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_somali","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_somali","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_somali| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-somali \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_pipeline_en.md new file mode 100644 index 00000000000000..e82dea87c703d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_somali_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_somali_pipeline pipeline XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_somali_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_somali_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_somali_pipeline_en_5.5.0_3.0_1725532086368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_somali_pipeline_en_5.5.0_3.0_1725532086368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_somali_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_somali_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_somali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-somali + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_squad_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_squad_1_en.md new file mode 100644 index 00000000000000..9c2e539b43a078 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuned_squad_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_squad_1 XlmRoBertaForQuestionAnswering from kianshokraneh +author: John Snow Labs +name: xlm_roberta_base_finetuned_squad_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_squad_1` is a English model originally trained by kianshokraneh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_1_en_5.5.0_3.0_1725499025478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_1_en_5.5.0_3.0_1725499025478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_squad_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_squad_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_squad_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|812.2 MB| + +## References + +https://huggingface.co/kianshokraneh/xlm-roberta-base-finetuned-squad-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en.md new file mode 100644 index 00000000000000..e3505e9019b053 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01 XlmRoBertaForSequenceClassification from grrfdghebsz +author: John Snow Labs +name: xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01` is a English model originally trained by grrfdghebsz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en_5.5.0_3.0_1725536630699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_en_5.5.0_3.0_1725536630699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|812.5 MB| + +## References + +https://huggingface.co/grrfdghebsz/xlm-roberta-base-finetuning-semeval-all-then-wrime-all-first-epoch3-test01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en.md new file mode 100644 index 00000000000000..80950d456cf942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline pipeline XlmRoBertaForSequenceClassification from grrfdghebsz +author: John Snow Labs +name: xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline` is a English model originally trained by grrfdghebsz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en_5.5.0_3.0_1725536751126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline_en_5.5.0_3.0_1725536751126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuning_semeval_all_then_wrime_all_first_epoch3_test01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|812.6 MB| + +## References + +https://huggingface.co/grrfdghebsz/xlm-roberta-base-finetuning-semeval-all-then-wrime-all-first-epoch3-test01 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_en.md new file mode 100644 index 00000000000000..1a977571cdfc73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_indonesian XlmRoBertaEmbeddings from harish +author: John Snow Labs +name: xlm_roberta_base_indonesian +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_indonesian` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_indonesian_en_5.5.0_3.0_1725555285395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_indonesian_en_5.5.0_3.0_1725555285395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_indonesian","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_indonesian","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_indonesian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|653.1 MB| + +## References + +https://huggingface.co/harish/xlm-roberta-base-ID \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_pipeline_en.md new file mode 100644 index 00000000000000..43be2a387c24cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_indonesian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_indonesian_pipeline pipeline XlmRoBertaEmbeddings from harish +author: John Snow Labs +name: xlm_roberta_base_indonesian_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_indonesian_pipeline` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_indonesian_pipeline_en_5.5.0_3.0_1725555477936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_indonesian_pipeline_en_5.5.0_3.0_1725555477936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_indonesian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_indonesian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_indonesian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|653.1 MB| + +## References + +https://huggingface.co/harish/xlm-roberta-base-ID + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en.md new file mode 100644 index 00000000000000..23bdbe7cfa6197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en_5.5.0_3.0_1725530019937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_en_5.5.0_3.0_1725530019937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|830.4 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr0.001_seed42_amh-hau-eng_train \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en.md new file mode 100644 index 00000000000000..24989e1fbff0cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline pipeline XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en_5.5.0_3.0_1725530105915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline_en_5.5.0_3.0_1725530105915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr0_001_seed42_amh_hau_eng_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|830.4 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr0.001_seed42_amh-hau-eng_train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en.md new file mode 100644 index 00000000000000..8ea2822501775d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en_5.5.0_3.0_1725537423602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_en_5.5.0_3.0_1725537423602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|799.1 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr2e-05_seed42_basic_original_kin-amh-eng_train \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en.md new file mode 100644 index 00000000000000..cdd22034e819f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline pipeline XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en_5.5.0_3.0_1725537562525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline_en_5.5.0_3.0_1725537562525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_amh_eng_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|799.1 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr2e-05_seed42_basic_original_kin-amh-eng_train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en.md new file mode 100644 index 00000000000000..55c9fff4130b55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en_5.5.0_3.0_1725536780552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_en_5.5.0_3.0_1725536780552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|797.4 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr2e-05_seed42_basic_original_kin-hau-eng_train \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en.md new file mode 100644 index 00000000000000..e8160a95b2b0e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline pipeline XlmRoBertaForSequenceClassification from shanhy +author: John Snow Labs +name: xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline` is a English model originally trained by shanhy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en_5.5.0_3.0_1725536921354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline_en_5.5.0_3.0_1725536921354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_lr2e_05_seed42_basic_original_kinyarwanda_hau_eng_train_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|797.4 MB| + +## References + +https://huggingface.co/shanhy/xlm-roberta-base_lr2e-05_seed42_basic_original_kin-hau-eng_train + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_en.md new file mode 100644 index 00000000000000..1f5bb1512d22e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_mixed_replace_vietnamese XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_mixed_replace_vietnamese +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_mixed_replace_vietnamese` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_mixed_replace_vietnamese_en_5.5.0_3.0_1725526230018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_mixed_replace_vietnamese_en_5.5.0_3.0_1725526230018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_mixed_replace_vietnamese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_mixed_replace_vietnamese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_mixed_replace_vietnamese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|838.1 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Mixed-replace-vi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_pipeline_en.md new file mode 100644 index 00000000000000..6073103eb7928a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_mixed_replace_vietnamese_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_mixed_replace_vietnamese_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_mixed_replace_vietnamese_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_mixed_replace_vietnamese_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_mixed_replace_vietnamese_pipeline_en_5.5.0_3.0_1725526297179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_mixed_replace_vietnamese_pipeline_en_5.5.0_3.0_1725526297179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_mixed_replace_vietnamese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_mixed_replace_vietnamese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_mixed_replace_vietnamese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|838.2 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Mixed-replace-vi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en.md new file mode 100644 index 00000000000000..10f702640f7491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data XlmRoBertaForQuestionAnswering from chiendvhust +author: John Snow Labs +name: xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data` is a English model originally trained by chiendvhust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en_5.5.0_3.0_1725570903408.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_en_5.5.0_3.0_1725570903408.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.4 MB| + +## References + +https://huggingface.co/chiendvhust/xlm-roberta-base-squad2-finetuned-squad2-covidQA-V2-all-data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en.md new file mode 100644 index 00000000000000..2c5a80afe252d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline pipeline XlmRoBertaForQuestionAnswering from chiendvhust +author: John Snow Labs +name: xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline` is a English model originally trained by chiendvhust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en_5.5.0_3.0_1725570974518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline_en_5.5.0_3.0_1725570974518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad2_finetuned_squad2_covidqa_v2_all_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.4 MB| + +## References + +https://huggingface.co/chiendvhust/xlm-roberta-base-squad2-finetuned-squad2-covidQA-V2-all-data + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en.md new file mode 100644 index 00000000000000..b438c81d894379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad2_finetuned_squad_vnktrmnb XlmRoBertaForQuestionAnswering from vnktrmnb +author: John Snow Labs +name: xlm_roberta_base_squad2_finetuned_squad_vnktrmnb +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad2_finetuned_squad_vnktrmnb` is a English model originally trained by vnktrmnb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en_5.5.0_3.0_1725567869292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad2_finetuned_squad_vnktrmnb_en_5.5.0_3.0_1725567869292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_finetuned_squad_vnktrmnb","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad2_finetuned_squad_vnktrmnb", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad2_finetuned_squad_vnktrmnb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|878.3 MB| + +## References + +https://huggingface.co/vnktrmnb/xlm-roberta-base-squad2-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_en.md new file mode 100644 index 00000000000000..7ba1783b89fd18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad_dutch XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_dutch +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_dutch` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_dutch_en_5.5.0_3.0_1725557237564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_dutch_en_5.5.0_3.0_1725557237564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_dutch","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_dutch", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_dutch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.2 MB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-nl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_pipeline_en.md new file mode 100644 index 00000000000000..443778f2aa2168 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_dutch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_squad_dutch_pipeline pipeline XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_dutch_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_dutch_pipeline` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_dutch_pipeline_en_5.5.0_3.0_1725557302921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_dutch_pipeline_en_5.5.0_3.0_1725557302921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_squad_dutch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_squad_dutch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_dutch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.2 MB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-nl + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_dutch_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_dutch_en.md new file mode 100644 index 00000000000000..7252cb4b041ee0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_dutch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad_finetuned_on_runaways_dutch XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_finetuned_on_runaways_dutch +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_finetuned_on_runaways_dutch` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_dutch_en_5.5.0_3.0_1725499364603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_dutch_en_5.5.0_3.0_1725499364603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_dutch","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_dutch", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_finetuned_on_runaways_dutch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-finetuned-on-runaways-nl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_en.md new file mode 100644 index 00000000000000..12fd589d28e7b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad_finetuned_on_runaways_english XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_finetuned_on_runaways_english +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_finetuned_on_runaways_english` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_english_en_5.5.0_3.0_1725573739469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_english_en_5.5.0_3.0_1725573739469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_english","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_english", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_finetuned_on_runaways_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-finetuned-on-runaways-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en.md new file mode 100644 index 00000000000000..d5f93a518108db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline pipeline XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en_5.5.0_3.0_1725573794513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline_en_5.5.0_3.0_1725573794513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_finetuned_on_runaways_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-finetuned-on-runaways-en + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_en.md new file mode 100644 index 00000000000000..90f6900cff9dd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_squad_finetuned_on_runaways_french XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_finetuned_on_runaways_french +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_finetuned_on_runaways_french` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_french_en_5.5.0_3.0_1725570459286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_french_en_5.5.0_3.0_1725570459286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_french","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_squad_finetuned_on_runaways_french", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_finetuned_on_runaways_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-finetuned-on-runaways-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en.md new file mode 100644 index 00000000000000..fe2035e2a0f3c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline pipeline XlmRoBertaForQuestionAnswering from Nadav +author: John Snow Labs +name: xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en_5.5.0_3.0_1725570515523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline_en_5.5.0_3.0_1725570515523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_squad_finetuned_on_runaways_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-squad-finetuned-on-runaways-fr + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en.md new file mode 100644 index 00000000000000..5f1d502c84f831 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en_5.5.0_3.0_1725529174746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french_en_5.5.0_3.0_1725529174746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_french_60000_tweet_sentiment_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|442.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-fr-60000-tweet-sentiment-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en.md new file mode 100644 index 00000000000000..c17a6da7f37df4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000 XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en_5.5.0_3.0_1725526553494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_en_5.5.0_3.0_1725526553494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|359.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-ar-trimmed-ar-15000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en.md new file mode 100644 index 00000000000000..f63dded9d35206 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en_5.5.0_3.0_1725526574979.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline_en_5.5.0_3.0_1725526574979.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_arabic_trimmed_arabic_15000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|359.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-ar-trimmed-ar-15000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en.md new file mode 100644 index 00000000000000..5a105a59821d59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000 XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en_5.5.0_3.0_1725530063769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_en_5.5.0_3.0_1725530063769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-en-trimmed-en-30000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en.md new file mode 100644 index 00000000000000..d13961ab6e98b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en_5.5.0_3.0_1725530091143.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline_en_5.5.0_3.0_1725530091143.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_english_trimmed_english_30000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-en-trimmed-en-30000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_french_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_french_en.md new file mode 100644 index 00000000000000..afe4a7fe889ccf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_french XlmRoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_french +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_french` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_french_en_5.5.0_3.0_1725514140254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_french_en_5.5.0_3.0_1725514140254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_french","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_french", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|780.6 MB| + +## References + +https://huggingface.co/cardiffnlp/xlm-roberta-base-tweet-sentiment-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en.md new file mode 100644 index 00000000000000..5a905235fd7c79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000 XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en_5.5.0_3.0_1725529031348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_en_5.5.0_3.0_1725529031348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|349.8 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-it-trimmed-it-10000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en.md new file mode 100644 index 00000000000000..2b43b8226db813 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en_5.5.0_3.0_1725529052140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline_en_5.5.0_3.0_1725529052140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_italian_trimmed_italian_10000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|349.8 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-it-trimmed-it-10000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en.md new file mode 100644 index 00000000000000..af834fd8c49de8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000 XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en_5.5.0_3.0_1725527164197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_en_5.5.0_3.0_1725527164197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|387.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-es-trimmed-es-30000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en.md new file mode 100644 index 00000000000000..5ebb7ec23cf3c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en_5.5.0_3.0_1725527193514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline_en_5.5.0_3.0_1725527193514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_30000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.6 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-es-trimmed-es-30000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_en.md new file mode 100644 index 00000000000000..a842ad03c6dbd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_verdict_classification XlmRoBertaForSequenceClassification from tt1225 +author: John Snow Labs +name: xlm_roberta_base_verdict_classification +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_verdict_classification` is a English model originally trained by tt1225. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_verdict_classification_en_5.5.0_3.0_1725536713731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_verdict_classification_en_5.5.0_3.0_1725536713731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_verdict_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_verdict_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_verdict_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|883.2 MB| + +## References + +https://huggingface.co/tt1225/xlm-roberta-base-verdict-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_pipeline_en.md new file mode 100644 index 00000000000000..73739a760271e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_verdict_classification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_verdict_classification_pipeline pipeline XlmRoBertaForSequenceClassification from tt1225 +author: John Snow Labs +name: xlm_roberta_base_verdict_classification_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_verdict_classification_pipeline` is a English model originally trained by tt1225. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_verdict_classification_pipeline_en_5.5.0_3.0_1725536774911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_verdict_classification_pipeline_en_5.5.0_3.0_1725536774911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_verdict_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_verdict_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_verdict_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|883.2 MB| + +## References + +https://huggingface.co/tt1225/xlm-roberta-base-verdict-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_en.md new file mode 100644 index 00000000000000..4bf57346c28a44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_vietnamese XlmRoBertaForQuestionAnswering from SalmonAI123 +author: John Snow Labs +name: xlm_roberta_base_vietnamese +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_vietnamese` is a English model originally trained by SalmonAI123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_vietnamese_en_5.5.0_3.0_1725498768051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_vietnamese_en_5.5.0_3.0_1725498768051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_vietnamese","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_vietnamese", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_vietnamese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|881.3 MB| + +## References + +https://huggingface.co/SalmonAI123/xlm-roberta-base-vietnamese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_pipeline_en.md new file mode 100644 index 00000000000000..58e87735b2e80b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_vietnamese_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_vietnamese_pipeline pipeline XlmRoBertaForQuestionAnswering from SalmonAI123 +author: John Snow Labs +name: xlm_roberta_base_vietnamese_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_vietnamese_pipeline` is a English model originally trained by SalmonAI123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_vietnamese_pipeline_en_5.5.0_3.0_1725498825546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_vietnamese_pipeline_en_5.5.0_3.0_1725498825546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_vietnamese_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_vietnamese_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_vietnamese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|881.3 MB| + +## References + +https://huggingface.co/SalmonAI123/xlm-roberta-base-vietnamese + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_yelp_mlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_yelp_mlm_pipeline_en.md new file mode 100644 index 00000000000000..f7accd7b4092e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_base_yelp_mlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_yelp_mlm_pipeline pipeline XlmRoBertaEmbeddings from Yaxin +author: John Snow Labs +name: xlm_roberta_base_yelp_mlm_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_yelp_mlm_pipeline` is a English model originally trained by Yaxin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_yelp_mlm_pipeline_en_5.5.0_3.0_1725531941109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_yelp_mlm_pipeline_en_5.5.0_3.0_1725531941109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_yelp_mlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_yelp_mlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_yelp_mlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Yaxin/xlm-roberta-base-yelp-mlm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_en.md new file mode 100644 index 00000000000000..c6d5aaf2dec47b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_clickbait_detection_nepal_bhasa_data XlmRoBertaForSequenceClassification from christinacdl +author: John Snow Labs +name: xlm_roberta_clickbait_detection_nepal_bhasa_data +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_clickbait_detection_nepal_bhasa_data` is a English model originally trained by christinacdl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_detection_nepal_bhasa_data_en_5.5.0_3.0_1725526137031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_detection_nepal_bhasa_data_en_5.5.0_3.0_1725526137031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_clickbait_detection_nepal_bhasa_data","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_clickbait_detection_nepal_bhasa_data", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_clickbait_detection_nepal_bhasa_data| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|820.3 MB| + +## References + +https://huggingface.co/christinacdl/XLM_RoBERTa-Clickbait-Detection-NEW-Data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en.md new file mode 100644 index 00000000000000..865cd0776e51f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline pipeline XlmRoBertaForSequenceClassification from christinacdl +author: John Snow Labs +name: xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline` is a English model originally trained by christinacdl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en_5.5.0_3.0_1725526251422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline_en_5.5.0_3.0_1725526251422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_clickbait_detection_nepal_bhasa_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|820.3 MB| + +## References + +https://huggingface.co/christinacdl/XLM_RoBERTa-Clickbait-Detection-NEW-Data + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_spoiling_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_spoiling_2_pipeline_en.md new file mode 100644 index 00000000000000..f47fb5dce05815 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_clickbait_spoiling_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_clickbait_spoiling_2_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlm_roberta_clickbait_spoiling_2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_clickbait_spoiling_2_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_spoiling_2_pipeline_en_5.5.0_3.0_1725574331605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_clickbait_spoiling_2_pipeline_en_5.5.0_3.0_1725574331605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_clickbait_spoiling_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_clickbait_spoiling_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_clickbait_spoiling_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.8 MB| + +## References + +https://huggingface.co/intanm/xlm-roberta-clickbait-spoiling-2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_en.md new file mode 100644 index 00000000000000..76cbe73b5bec74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_emotion_unmolb XlmRoBertaForSequenceClassification from unmolb +author: John Snow Labs +name: xlm_roberta_emotion_unmolb +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_emotion_unmolb` is a English model originally trained by unmolb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_emotion_unmolb_en_5.5.0_3.0_1725537146839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_emotion_unmolb_en_5.5.0_3.0_1725537146839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_emotion_unmolb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_emotion_unmolb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_emotion_unmolb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|783.8 MB| + +## References + +https://huggingface.co/unmolb/xlm-roberta-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_pipeline_en.md new file mode 100644 index 00000000000000..afbfd61e4802ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_emotion_unmolb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_emotion_unmolb_pipeline pipeline XlmRoBertaForSequenceClassification from unmolb +author: John Snow Labs +name: xlm_roberta_emotion_unmolb_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_emotion_unmolb_pipeline` is a English model originally trained by unmolb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_emotion_unmolb_pipeline_en_5.5.0_3.0_1725537289166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_emotion_unmolb_pipeline_en_5.5.0_3.0_1725537289166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_emotion_unmolb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_emotion_unmolb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_emotion_unmolb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.8 MB| + +## References + +https://huggingface.co/unmolb/xlm-roberta-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_pipeline_xx.md new file mode 100644 index 00000000000000..34b2a51833a804 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlm_roberta_europarl_language_detection_pipeline pipeline XlmRoBertaForSequenceClassification from simoneteglia +author: John Snow Labs +name: xlm_roberta_europarl_language_detection_pipeline +date: 2024-09-05 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_europarl_language_detection_pipeline` is a Multilingual model originally trained by simoneteglia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_europarl_language_detection_pipeline_xx_5.5.0_3.0_1725514283484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_europarl_language_detection_pipeline_xx_5.5.0_3.0_1725514283484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_europarl_language_detection_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_europarl_language_detection_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_europarl_language_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|882.5 MB| + +## References + +https://huggingface.co/simoneteglia/xlm-roberta-europarl-language-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_xx.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_xx.md new file mode 100644 index 00000000000000..9bfae2164d2634 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_europarl_language_detection_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual xlm_roberta_europarl_language_detection XlmRoBertaForSequenceClassification from simoneteglia +author: John Snow Labs +name: xlm_roberta_europarl_language_detection +date: 2024-09-05 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_europarl_language_detection` is a Multilingual model originally trained by simoneteglia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_europarl_language_detection_xx_5.5.0_3.0_1725514181074.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_europarl_language_detection_xx_5.5.0_3.0_1725514181074.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_europarl_language_detection","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_europarl_language_detection", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_europarl_language_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|882.5 MB| + +## References + +https://huggingface.co/simoneteglia/xlm-roberta-europarl-language-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en.md new file mode 100644 index 00000000000000..c91c68bf7d0a69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_finetuned_augument_visquad2_24_3_2023_1 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_finetuned_augument_visquad2_24_3_2023_1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_finetuned_augument_visquad2_24_3_2023_1` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en_5.5.0_3.0_1725571045947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_en_5.5.0_3.0_1725571045947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_finetuned_augument_visquad2_24_3_2023_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_finetuned_augument_visquad2_24_3_2023_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_finetuned_augument_visquad2_24_3_2023_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|852.1 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-finetuned-augument-visquad2-24-3-2023-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en.md new file mode 100644 index 00000000000000..5327d3c974ab65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en_5.5.0_3.0_1725571108927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline_en_5.5.0_3.0_1725571108927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_finetuned_augument_visquad2_24_3_2023_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|852.1 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-finetuned-augument-visquad2-24-3-2023-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en.md new file mode 100644 index 00000000000000..3128c8a9d300b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5 XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en_5.5.0_3.0_1725566989952.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_en_5.5.0_3.0_1725566989952.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.8 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1693993829-8-2e-05-0.01-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en.md new file mode 100644 index 00000000000000..1aed55b67c0de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline pipeline XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en_5.5.0_3.0_1725567053048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline_en_5.5.0_3.0_1725567053048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1693993829_8_2e_05_0_01_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|874.8 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1693993829-8-2e-05-0.01-5 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en.md new file mode 100644 index 00000000000000..7f28345ff56d80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline pipeline XlmRoBertaForQuestionAnswering from vesteinn +author: John Snow Labs +name: xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline` is a English model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en_5.5.0_3.0_1725559036637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline_en_5.5.0_3.0_1725559036637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_XLMr_ENIS_QA_IsQ_EnA_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|456.9 MB| + +## References + +https://huggingface.co/vesteinn/XLMr-ENIS-QA-IsQ-EnA + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en.md new file mode 100644 index 00000000000000..1d5b94bf002e32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en_5.5.0_3.0_1725558006003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline_en_5.5.0_3.0_1725558006003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265902_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|888.2 MB| + +## References + +https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265902 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en.md new file mode 100644 index 00000000000000..d4a61a9abc8a63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908 +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-more_fine_tune_24465520-26265908` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en_5.5.0_3.0_1725571053557.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908_en_5.5.0_3.0_1725571053557.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.xlm_roberta.fine_tune_24465520_26265908").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265908| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|888.2 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265908 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en.md new file mode 100644 index 00000000000000..f49795db56e286 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en_5.5.0_3.0_1725499142593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline_en_5.5.0_3.0_1725499142593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265909_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|887.8 MB| + +## References + +https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265909 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en.md new file mode 100644 index 00000000000000..35e41a158ff790 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911 +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-more_fine_tune_24465520-26265911` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en_5.5.0_3.0_1725558713935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_en_5.5.0_3.0_1725558713935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.xlm_roberta.fine_tune_24465520_26265911").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|887.8 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265911 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en.md new file mode 100644 index 00000000000000..1763f67f0331a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en_5.5.0_3.0_1725558793558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline_en_5.5.0_3.0_1725558793558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265911_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|887.8 MB| + +## References + +https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265911 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en.md new file mode 100644 index 00000000000000..db94e84318cba7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en_5.5.0_3.0_1725571504685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline_en_5.5.0_3.0_1725571504685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_roberta_base_squad2_24465519_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|887.3 MB| + +## References + +https://huggingface.co/teacookies/autonlp-roberta-base-squad2-24465519 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en.md new file mode 100644 index 00000000000000..255769ec599128 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_roberta_base_squad2_24465521 +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-roberta-base-squad2-24465521` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en_5.5.0_3.0_1725557259465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_en_5.5.0_3.0_1725557259465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_roberta_base_squad2_24465521","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_roberta_base_squad2_24465521","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.xlm_roberta.base_24465521.by_teacookies").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_roberta_base_squad2_24465521| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|887.4 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-roberta-base-squad2-24465521 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en.md new file mode 100644 index 00000000000000..be56cf9309428b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en_5.5.0_3.0_1725557330437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline_en_5.5.0_3.0_1725557330437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_roberta_base_squad2_24465521_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|887.4 MB| + +## References + +https://huggingface.co/teacookies/autonlp-roberta-base-squad2-24465521 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th.md new file mode 100644 index 00000000000000..7fc205a234bd6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Thai xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline pipeline XlmRoBertaForQuestionAnswering from wicharnkeisei +author: John Snow Labs +name: xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline +date: 2024-09-05 +tags: [th, open_source, pipeline, onnx] +task: Question Answering +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline` is a Thai model originally trained by wicharnkeisei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th_5.5.0_3.0_1725558623410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline_th_5.5.0_3.0_1725558623410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline", lang = "th") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline", lang = "th") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_thai_xlm_roberta_base_squad2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|th| +|Size:|881.1 MB| + +## References + +https://huggingface.co/wicharnkeisei/thai-xlm-roberta-base-squad2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_th.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_th.md new file mode 100644 index 00000000000000..5de53cd8cbfec3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_thai_xlm_roberta_base_squad2_th.md @@ -0,0 +1,107 @@ +--- +layout: model +title: Thai XlmRoBertaForQuestionAnswering (from wicharnkeisei) +author: John Snow Labs +name: xlm_roberta_qa_thai_xlm_roberta_base_squad2 +date: 2024-09-05 +tags: [th, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `thai-xlm-roberta-base-squad2` is a Thai model originally trained by `wicharnkeisei`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_thai_xlm_roberta_base_squad2_th_5.5.0_3.0_1725558556348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_thai_xlm_roberta_base_squad2_th_5.5.0_3.0_1725558556348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_thai_xlm_roberta_base_squad2","th") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_thai_xlm_roberta_base_squad2","th") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("th.answer_question.squadv2.xlm_roberta.base").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_thai_xlm_roberta_base_squad2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|th| +|Size:|881.1 MB| + +## References + +References + +- https://huggingface.co/wicharnkeisei/thai-xlm-roberta-base-squad2 +- https://github.com/iapp-technology/iapp-wiki-qa-dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_en.md new file mode 100644 index 00000000000000..cf0b56b641a6f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from krinal214) +author: John Snow Labs +name: xlm_roberta_qa_xlm_all +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-all` is a English model originally trained by `krinal214`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_all_en_5.5.0_3.0_1725558458580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_all_en_5.5.0_3.0_1725558458580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlm_all","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_xlm_all","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.tydiqa.xlm_roberta").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|923.8 MB| + +## References + +References + +- https://huggingface.co/krinal214/xlm-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_pipeline_en.md new file mode 100644 index 00000000000000..fa1939e5da2465 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_all_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_xlm_all_pipeline pipeline XlmRoBertaForQuestionAnswering from krinal214 +author: John Snow Labs +name: xlm_roberta_qa_xlm_all_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlm_all_pipeline` is a English model originally trained by krinal214. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_all_pipeline_en_5.5.0_3.0_1725558524694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_all_pipeline_en_5.5.0_3.0_1725558524694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_xlm_all_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_xlm_all_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|923.8 MB| + +## References + +https://huggingface.co/krinal214/xlm-all + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_chaii_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_chaii_en.md new file mode 100644 index 00000000000000..c200729d00beb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_chaii_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from SauravMaheshkar) +author: John Snow Labs +name: xlm_roberta_qa_xlm_roberta_base_chaii +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-chaii` is a English model originally trained by `SauravMaheshkar`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_chaii_en_5.5.0_3.0_1725498623288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_chaii_en_5.5.0_3.0_1725498623288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlm_roberta_base_chaii","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_xlm_roberta_base_chaii","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.chaii.xlm_roberta.base.by_SauravMaheshkar").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_roberta_base_chaii| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|888.3 MB| + +## References + +References + +- https://huggingface.co/SauravMaheshkar/xlm-roberta-base-chaii \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn.md new file mode 100644 index 00000000000000..648f439d368fc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn.md @@ -0,0 +1,69 @@ +--- +layout: model +title: None xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline pipeline XlmRoBertaForQuestionAnswering from bhavikardeshna +author: John Snow Labs +name: xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline +date: 2024-09-05 +tags: [vn, open_source, pipeline, onnx] +task: Question Answering +language: vn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline` is a None model originally trained by bhavikardeshna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn_5.5.0_3.0_1725557436584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline_vn_5.5.0_3.0_1725557436584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline", lang = "vn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline", lang = "vn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_roberta_base_vietnamese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vn| +|Size:|880.3 MB| + +## References + +https://huggingface.co/bhavikardeshna/xlm-roberta-base-vietnamese + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_vn.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_vn.md new file mode 100644 index 00000000000000..7f8258ce2f0ef8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_base_vietnamese_vn.md @@ -0,0 +1,106 @@ +--- +layout: model +title: Vietnamese XlmRoBertaForQuestionAnswering (from bhavikardeshna) +author: John Snow Labs +name: xlm_roberta_qa_xlm_roberta_base_vietnamese +date: 2024-09-05 +tags: [vn, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: vn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-vietnamese` is a Vietnamese model originally trained by `bhavikardeshna`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_vietnamese_vn_5.5.0_3.0_1725557374550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_base_vietnamese_vn_5.5.0_3.0_1725557374550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlm_roberta_base_vietnamese","vn") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_xlm_roberta_base_vietnamese","vn") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("vn.answer_question.xlm_roberta.base").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_roberta_base_vietnamese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|vn| +|Size:|880.3 MB| + +## References + +References + +- https://huggingface.co/bhavikardeshna/xlm-roberta-base-vietnamese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_en.md new file mode 100644 index 00000000000000..9b8976e8543a87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from jakobwes) +author: John Snow Labs +name: xlm_roberta_qa_xlm_roberta_squad_v1.1 +date: 2024-09-05 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm_roberta_squad_v1.1` is a English model originally trained by `jakobwes`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_squad_v1.1_en_5.5.0_3.0_1725570833912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_squad_v1.1_en_5.5.0_3.0_1725570833912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlm_roberta_squad_v1.1","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_xlm_roberta_squad_v1.1","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.xlm_roberta").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_roberta_squad_v1.1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|818.5 MB| + +## References + +References + +- https://huggingface.co/jakobwes/xlm_roberta_squad_v1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en.md new file mode 100644 index 00000000000000..a12446ebaf7e2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline pipeline XlmRoBertaForQuestionAnswering from jakobwes +author: John Snow Labs +name: xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline` is a English model originally trained by jakobwes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en_5.5.0_3.0_1725570970948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline_en_5.5.0_3.0_1725570970948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlm_roberta_squad_v1.1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.5 MB| + +## References + +https://huggingface.co/jakobwes/xlm_roberta_squad_v1.1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es.md new file mode 100644 index 00000000000000..5339aa14ced555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Castilian, Spanish xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan XlmRoBertaForQuestionAnswering from saattrupdan +author: John Snow Labs +name: xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan +date: 2024-09-05 +tags: [es, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan` is a Castilian, Spanish model originally trained by saattrupdan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es_5.5.0_3.0_1725559442631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_es_5.5.0_3.0_1725559442631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan","es") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan", "es") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|es| +|Size:|876.3 MB| + +## References + +https://huggingface.co/saattrupdan/xlmr-base-texas-squad-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es.md new file mode 100644 index 00000000000000..6f66e17f10865e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Castilian, Spanish xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline pipeline XlmRoBertaForQuestionAnswering from saattrupdan +author: John Snow Labs +name: xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline +date: 2024-09-05 +tags: [es, open_source, pipeline, onnx] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline` is a Castilian, Spanish model originally trained by saattrupdan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es_5.5.0_3.0_1725559507243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline_es_5.5.0_3.0_1725559507243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlmr_base_texas_squad_spanish_spanish_saattrupdan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|876.3 MB| + +## References + +https://huggingface.co/saattrupdan/xlmr-base-texas-squad-es + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_is.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_is.md new file mode 100644 index 00000000000000..11a1864bb2b207 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_is.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Icelandic xlm_roberta_qa_xlmr_enis_qa_icelandic XlmRoBertaForQuestionAnswering from vesteinn +author: John Snow Labs +name: xlm_roberta_qa_xlmr_enis_qa_icelandic +date: 2024-09-05 +tags: [is, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: is +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlmr_enis_qa_icelandic` is a Icelandic model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_enis_qa_icelandic_is_5.5.0_3.0_1725556580595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_enis_qa_icelandic_is_5.5.0_3.0_1725556580595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlmr_enis_qa_icelandic","is") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_xlmr_enis_qa_icelandic", "is") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlmr_enis_qa_icelandic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|is| +|Size:|452.6 MB| + +## References + +https://huggingface.co/vesteinn/XLMr-ENIS-QA-Is \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is.md new file mode 100644 index 00000000000000..3b659fcc272646 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Icelandic xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline pipeline XlmRoBertaForQuestionAnswering from vesteinn +author: John Snow Labs +name: xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline +date: 2024-09-05 +tags: [is, open_source, pipeline, onnx] +task: Question Answering +language: is +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline` is a Icelandic model originally trained by vesteinn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is_5.5.0_3.0_1725556609818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline_is_5.5.0_3.0_1725556609818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline", lang = "is") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline", lang = "is") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_xlmr_enis_qa_icelandic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|is| +|Size:|452.6 MB| + +## References + +https://huggingface.co/vesteinn/XLMr-ENIS-QA-Is + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_en.md new file mode 100644 index 00000000000000..fcc435cbd6b59d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_v_base_trimmed_english_xnli_english XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_v_base_trimmed_english_xnli_english +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_v_base_trimmed_english_xnli_english` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_v_base_trimmed_english_xnli_english_en_5.5.0_3.0_1725527636912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_v_base_trimmed_english_xnli_english_en_5.5.0_3.0_1725527636912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_v_base_trimmed_english_xnli_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_v_base_trimmed_english_xnli_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_v_base_trimmed_english_xnli_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-v-base-trimmed-en-xnli-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_pipeline_en.md new file mode 100644 index 00000000000000..14695758d1a329 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlm_v_base_trimmed_english_xnli_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_v_base_trimmed_english_xnli_english_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_v_base_trimmed_english_xnli_english_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_v_base_trimmed_english_xnli_english_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_v_base_trimmed_english_xnli_english_pipeline_en_5.5.0_3.0_1725527863469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_v_base_trimmed_english_xnli_english_pipeline_en_5.5.0_3.0_1725527863469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_v_base_trimmed_english_xnli_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_v_base_trimmed_english_xnli_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_v_base_trimmed_english_xnli_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-v-base-trimmed-en-xnli-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_chatgptdetect_noisy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_chatgptdetect_noisy_pipeline_en.md new file mode 100644 index 00000000000000..1755713c2cb270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_chatgptdetect_noisy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_chatgptdetect_noisy_pipeline pipeline XlmRoBertaForSequenceClassification from almanach +author: John Snow Labs +name: xlmr_chatgptdetect_noisy_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_chatgptdetect_noisy_pipeline` is a English model originally trained by almanach. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_chatgptdetect_noisy_pipeline_en_5.5.0_3.0_1725514071707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_chatgptdetect_noisy_pipeline_en_5.5.0_3.0_1725514071707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_chatgptdetect_noisy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_chatgptdetect_noisy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_chatgptdetect_noisy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|822.5 MB| + +## References + +https://huggingface.co/almanach/xlmr-chatgptdetect-noisy + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_all_shuffled_764_test1000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_all_shuffled_764_test1000_en.md new file mode 100644 index 00000000000000..2b9c7b9c9ec015 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_all_shuffled_764_test1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_english_german_all_shuffled_764_test1000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_all_shuffled_764_test1000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_all_shuffled_764_test1000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_764_test1000_en_5.5.0_3.0_1725537566461.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_764_test1000_en_5.5.0_3.0_1725537566461.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_all_shuffled_764_test1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_all_shuffled_764_test1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_all_shuffled_764_test1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|826.3 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-all_shuffled-764-test1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_en.md new file mode 100644 index 00000000000000..eeed850a385649 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_english_german_train_shuffled_1986_test2000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_train_shuffled_1986_test2000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_train_shuffled_1986_test2000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_train_shuffled_1986_test2000_en_5.5.0_3.0_1725537875192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_train_shuffled_1986_test2000_en_5.5.0_3.0_1725537875192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_train_shuffled_1986_test2000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_train_shuffled_1986_test2000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_train_shuffled_1986_test2000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|824.5 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-train_shuffled-1986-test2000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_pipeline_en.md new file mode 100644 index 00000000000000..5985285c864020 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_english_german_train_shuffled_1986_test2000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_english_german_train_shuffled_1986_test2000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_train_shuffled_1986_test2000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_train_shuffled_1986_test2000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725537993760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725537993760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_english_german_train_shuffled_1986_test2000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_english_german_train_shuffled_1986_test2000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_train_shuffled_1986_test2000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|824.5 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-train_shuffled-1986-test2000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_en.md new file mode 100644 index 00000000000000..bedce7cb3829b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_estonian_english_train_shuffled_1986_test2000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_estonian_english_train_shuffled_1986_test2000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_estonian_english_train_shuffled_1986_test2000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_train_shuffled_1986_test2000_en_5.5.0_3.0_1725536043825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_train_shuffled_1986_test2000_en_5.5.0_3.0_1725536043825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_estonian_english_train_shuffled_1986_test2000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_estonian_english_train_shuffled_1986_test2000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_estonian_english_train_shuffled_1986_test2000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|818.5 MB| + +## References + +https://huggingface.co/patpizio/xlmr-et-en-train_shuffled-1986-test2000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en.md new file mode 100644 index 00000000000000..776ba357b44c73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_estonian_english_train_shuffled_1986_test2000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_estonian_english_train_shuffled_1986_test2000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_estonian_english_train_shuffled_1986_test2000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725536159513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725536159513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_estonian_english_train_shuffled_1986_test2000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_estonian_english_train_shuffled_1986_test2000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_estonian_english_train_shuffled_1986_test2000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.5 MB| + +## References + +https://huggingface.co/patpizio/xlmr-et-en-train_shuffled-1986-test2000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_en.md new file mode 100644 index 00000000000000..687586f65a7d5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_finetuned_qamr XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_qamr +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_qamr` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_qamr_en_5.5.0_3.0_1725559372637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_qamr_en_5.5.0_3.0_1725559372637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_qamr","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_qamr", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_qamr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|846.9 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-QAMR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_pipeline_en.md new file mode 100644 index 00000000000000..be8a6f21d6ad4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_qamr_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_finetuned_qamr_pipeline pipeline XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_qamr_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_qamr_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_qamr_pipeline_en_5.5.0_3.0_1725559456935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_qamr_pipeline_en_5.5.0_3.0_1725559456935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_finetuned_qamr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_finetuned_qamr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_qamr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|846.9 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-QAMR + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_en.md new file mode 100644 index 00000000000000..3cf044c6c62d64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_finetuned_squad1 XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_squad1 +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_squad1` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_squad1_en_5.5.0_3.0_1725556482319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_squad1_en_5.5.0_3.0_1725556482319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_squad1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_squad1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_squad1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|871.9 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-SQuAD1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_pipeline_en.md new file mode 100644 index 00000000000000..eae31ac07d39f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_finetuned_squad1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_finetuned_squad1_pipeline pipeline XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_squad1_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_squad1_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_squad1_pipeline_en_5.5.0_3.0_1725556553154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_squad1_pipeline_en_5.5.0_3.0_1725556553154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_finetuned_squad1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_finetuned_squad1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_squad1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|871.9 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-SQuAD1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_en.md new file mode 100644 index 00000000000000..5dd1f7bfd5923a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_qa_register XlmRoBertaForSequenceClassification from TurkuNLP +author: John Snow Labs +name: xlmr_qa_register +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_qa_register` is a English model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_qa_register_en_5.5.0_3.0_1725529287360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_qa_register_en_5.5.0_3.0_1725529287360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_qa_register","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_qa_register", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_qa_register| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|843.3 MB| + +## References + +https://huggingface.co/TurkuNLP/xlmr-qa-register \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_pipeline_en.md new file mode 100644 index 00000000000000..fa6360f894a1f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_qa_register_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_qa_register_pipeline pipeline XlmRoBertaForSequenceClassification from TurkuNLP +author: John Snow Labs +name: xlmr_qa_register_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_qa_register_pipeline` is a English model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_qa_register_pipeline_en_5.5.0_3.0_1725529406009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_qa_register_pipeline_en_5.5.0_3.0_1725529406009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_qa_register_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_qa_register_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_qa_register_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.3 MB| + +## References + +https://huggingface.co/TurkuNLP/xlmr-qa-register + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_en.md new file mode 100644 index 00000000000000..3a8ed9311f1032 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_sinhalese_english_all_shuffled_42_test1000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_sinhalese_english_all_shuffled_42_test1000 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_sinhalese_english_all_shuffled_42_test1000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_sinhalese_english_all_shuffled_42_test1000_en_5.5.0_3.0_1725525990002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_sinhalese_english_all_shuffled_42_test1000_en_5.5.0_3.0_1725525990002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_sinhalese_english_all_shuffled_42_test1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_sinhalese_english_all_shuffled_42_test1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_sinhalese_english_all_shuffled_42_test1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|814.2 MB| + +## References + +https://huggingface.co/patpizio/xlmr-si-en-all_shuffled-42-test1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en.md new file mode 100644 index 00000000000000..6ea3b2014cc584 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en_5.5.0_3.0_1725526111436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline_en_5.5.0_3.0_1725526111436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_sinhalese_english_all_shuffled_42_test1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|814.2 MB| + +## References + +https://huggingface.co/patpizio/xlmr-si-en-all_shuffled-42-test1000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_en.md new file mode 100644 index 00000000000000..cb2a16c95b6a78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_tatoeba XlmRoBertaForSequenceClassification from bigpang +author: John Snow Labs +name: xlmr_tatoeba +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_tatoeba` is a English model originally trained by bigpang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_tatoeba_en_5.5.0_3.0_1725525837483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_tatoeba_en_5.5.0_3.0_1725525837483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_tatoeba","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_tatoeba", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_tatoeba| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|990.5 MB| + +## References + +https://huggingface.co/bigpang/xlmr-tatoeba \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_pipeline_en.md new file mode 100644 index 00000000000000..777b3bc2aad605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmr_tatoeba_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_tatoeba_pipeline pipeline XlmRoBertaForSequenceClassification from bigpang +author: John Snow Labs +name: xlmr_tatoeba_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_tatoeba_pipeline` is a English model originally trained by bigpang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_tatoeba_pipeline_en_5.5.0_3.0_1725525902189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_tatoeba_pipeline_en_5.5.0_3.0_1725525902189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_tatoeba_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_tatoeba_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_tatoeba_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|990.5 MB| + +## References + +https://huggingface.co/bigpang/xlmr-tatoeba + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_en.md new file mode 100644 index 00000000000000..9559f8f23e65f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmrbase_finetuned_squad XlmRoBertaForQuestionAnswering from PJM124 +author: John Snow Labs +name: xlmrbase_finetuned_squad +date: 2024-09-05 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrbase_finetuned_squad` is a English model originally trained by PJM124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrbase_finetuned_squad_en_5.5.0_3.0_1725567596635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrbase_finetuned_squad_en_5.5.0_3.0_1725567596635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmrbase_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmrbase_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrbase_finetuned_squad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|818.3 MB| + +## References + +https://huggingface.co/PJM124/xlmrbase-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_pipeline_en.md new file mode 100644 index 00000000000000..e6b17871a1d71a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmrbase_finetuned_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmrbase_finetuned_squad_pipeline pipeline XlmRoBertaForQuestionAnswering from PJM124 +author: John Snow Labs +name: xlmrbase_finetuned_squad_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrbase_finetuned_squad_pipeline` is a English model originally trained by PJM124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrbase_finetuned_squad_pipeline_en_5.5.0_3.0_1725567729751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrbase_finetuned_squad_pipeline_en_5.5.0_3.0_1725567729751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmrbase_finetuned_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmrbase_finetuned_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrbase_finetuned_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.3 MB| + +## References + +https://huggingface.co/PJM124/xlmrbase-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_squadv2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_squadv2_pipeline_en.md new file mode 100644 index 00000000000000..11cca5c99af90b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_squadv2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmroberta_finetuned_squadv2_pipeline pipeline XlmRoBertaForQuestionAnswering from quocviethere +author: John Snow Labs +name: xlmroberta_finetuned_squadv2_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_finetuned_squadv2_pipeline` is a English model originally trained by quocviethere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_squadv2_pipeline_en_5.5.0_3.0_1725497835303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_squadv2_pipeline_en_5.5.0_3.0_1725497835303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_finetuned_squadv2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_finetuned_squadv2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_finetuned_squadv2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|839.9 MB| + +## References + +https://huggingface.co/quocviethere/xlmroberta-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_tydiqa_tel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_tydiqa_tel_pipeline_en.md new file mode 100644 index 00000000000000..37925e7cf423ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmroberta_finetuned_tydiqa_tel_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmroberta_finetuned_tydiqa_tel_pipeline pipeline XlmRoBertaForQuestionAnswering from Auracle7 +author: John Snow Labs +name: xlmroberta_finetuned_tydiqa_tel_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_finetuned_tydiqa_tel_pipeline` is a English model originally trained by Auracle7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_tydiqa_tel_pipeline_en_5.5.0_3.0_1725498178809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_tydiqa_tel_pipeline_en_5.5.0_3.0_1725498178809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_finetuned_tydiqa_tel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_finetuned_tydiqa_tel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_finetuned_tydiqa_tel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|842.1 MB| + +## References + +https://huggingface.co/Auracle7/XLMRoberta-finetuned-TyDIQA-Tel + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en.md new file mode 100644 index 00000000000000..d1481a7eebd88a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmrobertabaseft_areaspopscience_mxtweets_multilabel XlmRoBertaForSequenceClassification from alecmontero +author: John Snow Labs +name: xlmrobertabaseft_areaspopscience_mxtweets_multilabel +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrobertabaseft_areaspopscience_mxtweets_multilabel` is a English model originally trained by alecmontero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en_5.5.0_3.0_1725535791317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrobertabaseft_areaspopscience_mxtweets_multilabel_en_5.5.0_3.0_1725535791317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmrobertabaseft_areaspopscience_mxtweets_multilabel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmrobertabaseft_areaspopscience_mxtweets_multilabel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrobertabaseft_areaspopscience_mxtweets_multilabel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/alecmontero/xlmrobertabaseft-areaspopscience-mxtweets-multilabel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en.md new file mode 100644 index 00000000000000..8c699c82d30c3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline pipeline XlmRoBertaForSequenceClassification from alecmontero +author: John Snow Labs +name: xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline` is a English model originally trained by alecmontero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en_5.5.0_3.0_1725535844255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline_en_5.5.0_3.0_1725535844255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrobertabaseft_areaspopscience_mxtweets_multilabel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/alecmontero/xlmrobertabaseft-areaspopscience-mxtweets-multilabel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xml_roberta_science_subject_text_classification_en.md b/docs/_posts/ahmedlone127/2024-09-05-xml_roberta_science_subject_text_classification_en.md new file mode 100644 index 00000000000000..76dc0505568bfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xml_roberta_science_subject_text_classification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xml_roberta_science_subject_text_classification XlmRoBertaForSequenceClassification from mominah +author: John Snow Labs +name: xml_roberta_science_subject_text_classification +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xml_roberta_science_subject_text_classification` is a English model originally trained by mominah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xml_roberta_science_subject_text_classification_en_5.5.0_3.0_1725515127157.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xml_roberta_science_subject_text_classification_en_5.5.0_3.0_1725515127157.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xml_roberta_science_subject_text_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xml_roberta_science_subject_text_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xml_roberta_science_subject_text_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|655.1 MB| + +## References + +https://huggingface.co/mominah/xml-roberta-science-subject-text-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_en.md b/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_en.md new file mode 100644 index 00000000000000..35a0a57e9e9efe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xnli_xlm_r_only_thai XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: xnli_xlm_r_only_thai +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xnli_xlm_r_only_thai` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_thai_en_5.5.0_3.0_1725525893337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_thai_en_5.5.0_3.0_1725525893337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xnli_xlm_r_only_thai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xnli_xlm_r_only_thai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xnli_xlm_r_only_thai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|804.3 MB| + +## References + +https://huggingface.co/semindan/xnli_xlm_r_only_th \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_pipeline_en.md new file mode 100644 index 00000000000000..833be083650fa4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-xnli_xlm_r_only_thai_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xnli_xlm_r_only_thai_pipeline pipeline XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: xnli_xlm_r_only_thai_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xnli_xlm_r_only_thai_pipeline` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_thai_pipeline_en_5.5.0_3.0_1725526025546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_thai_pipeline_en_5.5.0_3.0_1725526025546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xnli_xlm_r_only_thai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xnli_xlm_r_only_thai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xnli_xlm_r_only_thai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|804.3 MB| + +## References + +https://huggingface.co/semindan/xnli_xlm_r_only_th + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_en.md b/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_en.md new file mode 100644 index 00000000000000..6b2546efc6eaa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English yelp_polarity_microsoft_deberta_v3_base_seed_3 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: yelp_polarity_microsoft_deberta_v3_base_seed_3 +date: 2024-09-05 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yelp_polarity_microsoft_deberta_v3_base_seed_3` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_3_en_5.5.0_3.0_1725561462815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_3_en_5.5.0_3.0_1725561462815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("yelp_polarity_microsoft_deberta_v3_base_seed_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("yelp_polarity_microsoft_deberta_v3_base_seed_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yelp_polarity_microsoft_deberta_v3_base_seed_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|666.0 MB| + +## References + +https://huggingface.co/utahnlp/yelp_polarity_microsoft_deberta-v3-base_seed-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..86893f6ec1cbbf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725561511424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725561511424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yelp_polarity_microsoft_deberta_v3_base_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|666.1 MB| + +## References + +https://huggingface.co/utahnlp/yelp_polarity_microsoft_deberta-v3-base_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_en.md b/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_en.md new file mode 100644 index 00000000000000..64f7b6e6e3a403 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English yiddish_roberta_oscar RoBertaEmbeddings from ajanco +author: John Snow Labs +name: yiddish_roberta_oscar +date: 2024-09-05 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yiddish_roberta_oscar` is a English model originally trained by ajanco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yiddish_roberta_oscar_en_5.5.0_3.0_1725577856426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yiddish_roberta_oscar_en_5.5.0_3.0_1725577856426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("yiddish_roberta_oscar","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("yiddish_roberta_oscar","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yiddish_roberta_oscar| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/ajanco/yi_roberta_oscar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_pipeline_en.md new file mode 100644 index 00000000000000..f839b7c2827b40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-yiddish_roberta_oscar_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English yiddish_roberta_oscar_pipeline pipeline RoBertaEmbeddings from ajanco +author: John Snow Labs +name: yiddish_roberta_oscar_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yiddish_roberta_oscar_pipeline` is a English model originally trained by ajanco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yiddish_roberta_oscar_pipeline_en_5.5.0_3.0_1725577880802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yiddish_roberta_oscar_pipeline_en_5.5.0_3.0_1725577880802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("yiddish_roberta_oscar_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("yiddish_roberta_oscar_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yiddish_roberta_oscar_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/ajanco/yi_roberta_oscar + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-zh2en40_en.md b/docs/_posts/ahmedlone127/2024-09-05-zh2en40_en.md new file mode 100644 index 00000000000000..4d46d0d6f59a26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-zh2en40_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English zh2en40 MarianTransformer from Carlosino +author: John Snow Labs +name: zh2en40 +date: 2024-09-05 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`zh2en40` is a English model originally trained by Carlosino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zh2en40_en_5.5.0_3.0_1725544771682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zh2en40_en_5.5.0_3.0_1725544771682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("zh2en40","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("zh2en40","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zh2en40| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|540.0 MB| + +## References + +https://huggingface.co/Carlosino/zh2en40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-05-zh2en40_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-05-zh2en40_pipeline_en.md new file mode 100644 index 00000000000000..1aad6dcfc07a3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-05-zh2en40_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English zh2en40_pipeline pipeline MarianTransformer from Carlosino +author: John Snow Labs +name: zh2en40_pipeline +date: 2024-09-05 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`zh2en40_pipeline` is a English model originally trained by Carlosino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zh2en40_pipeline_en_5.5.0_3.0_1725544799341.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zh2en40_pipeline_en_5.5.0_3.0_1725544799341.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("zh2en40_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("zh2en40_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zh2en40_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|540.5 MB| + +## References + +https://huggingface.co/Carlosino/zh2en40 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en.md new file mode 100644 index 00000000000000..43fcc77e52d7c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline pipeline DeBertaForSequenceClassification from diogopaes10 +author: John Snow Labs +name: 008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline` is a English model originally trained by diogopaes10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en_5.5.0_3.0_1725609745657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline_en_5.5.0_3.0_1725609745657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|008_microsoft_deberta_v3_base_finetuned_yahoo_800_200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|584.5 MB| + +## References + +https://huggingface.co/diogopaes10/008-microsoft-deberta-v3-base-finetuned-yahoo-800_200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_en.md b/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_en.md new file mode 100644 index 00000000000000..5b7c737b40832c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 100_sdb_taxxl_average_768 DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 100_sdb_taxxl_average_768 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`100_sdb_taxxl_average_768` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_average_768_en_5.5.0_3.0_1725639592337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_average_768_en_5.5.0_3.0_1725639592337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("100_sdb_taxxl_average_768","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("100_sdb_taxxl_average_768","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|100_sdb_taxxl_average_768| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/sripadhstudy/100_SDB_TAxxL_average_768 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_pipeline_en.md new file mode 100644 index 00000000000000..a3ee0213f78f7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-100_sdb_taxxl_average_768_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 100_sdb_taxxl_average_768_pipeline pipeline DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 100_sdb_taxxl_average_768_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`100_sdb_taxxl_average_768_pipeline` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_average_768_pipeline_en_5.5.0_3.0_1725639604409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/100_sdb_taxxl_average_768_pipeline_en_5.5.0_3.0_1725639604409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("100_sdb_taxxl_average_768_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("100_sdb_taxxl_average_768_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|100_sdb_taxxl_average_768_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/sripadhstudy/100_SDB_TAxxL_average_768 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_en.md b/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_en.md new file mode 100644 index 00000000000000..18e7c6bdbd284f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English 20230328_002_baseline_xlmr_clickbait_spoiling XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: 20230328_002_baseline_xlmr_clickbait_spoiling +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20230328_002_baseline_xlmr_clickbait_spoiling` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20230328_002_baseline_xlmr_clickbait_spoiling_en_5.5.0_3.0_1725640617976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20230328_002_baseline_xlmr_clickbait_spoiling_en_5.5.0_3.0_1725640617976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("20230328_002_baseline_xlmr_clickbait_spoiling","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("20230328_002_baseline_xlmr_clickbait_spoiling", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20230328_002_baseline_xlmr_clickbait_spoiling| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|876.4 MB| + +## References + +https://huggingface.co/intanm/20230328-002-baseline-xlmr-clickbait-spoiling \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en.md new file mode 100644 index 00000000000000..19187d423ce1c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 20230328_002_baseline_xlmr_clickbait_spoiling_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: 20230328_002_baseline_xlmr_clickbait_spoiling_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20230328_002_baseline_xlmr_clickbait_spoiling_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en_5.5.0_3.0_1725640685544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20230328_002_baseline_xlmr_clickbait_spoiling_pipeline_en_5.5.0_3.0_1725640685544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("20230328_002_baseline_xlmr_clickbait_spoiling_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("20230328_002_baseline_xlmr_clickbait_spoiling_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20230328_002_baseline_xlmr_clickbait_spoiling_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.4 MB| + +## References + +https://huggingface.co/intanm/20230328-002-baseline-xlmr-clickbait-spoiling + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_en.md b/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_en.md new file mode 100644 index 00000000000000..3893a7cbb83af5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English 4_shot_twitter MPNetEmbeddings from Nhat1904 +author: John Snow Labs +name: 4_shot_twitter +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4_shot_twitter` is a English model originally trained by Nhat1904. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4_shot_twitter_en_5.5.0_3.0_1725595785071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4_shot_twitter_en_5.5.0_3.0_1725595785071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("4_shot_twitter","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("4_shot_twitter","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4_shot_twitter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Nhat1904/4-shot-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_pipeline_en.md new file mode 100644 index 00000000000000..dc591c077bea82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-4_shot_twitter_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 4_shot_twitter_pipeline pipeline MPNetEmbeddings from Nhat1904 +author: John Snow Labs +name: 4_shot_twitter_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4_shot_twitter_pipeline` is a English model originally trained by Nhat1904. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4_shot_twitter_pipeline_en_5.5.0_3.0_1725595806232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4_shot_twitter_pipeline_en_5.5.0_3.0_1725595806232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("4_shot_twitter_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("4_shot_twitter_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4_shot_twitter_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Nhat1904/4-shot-twitter + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-500_sdb_taxxl_truncate_768_en.md b/docs/_posts/ahmedlone127/2024-09-06-500_sdb_taxxl_truncate_768_en.md new file mode 100644 index 00000000000000..0f0b2945efe852 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-500_sdb_taxxl_truncate_768_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 500_sdb_taxxl_truncate_768 DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 500_sdb_taxxl_truncate_768 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`500_sdb_taxxl_truncate_768` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/500_sdb_taxxl_truncate_768_en_5.5.0_3.0_1725639277923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/500_sdb_taxxl_truncate_768_en_5.5.0_3.0_1725639277923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("500_sdb_taxxl_truncate_768","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("500_sdb_taxxl_truncate_768","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|500_sdb_taxxl_truncate_768| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/sripadhstudy/500_SDB_TAxxL_truncate_768 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-5w1h_recognizer_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-06-5w1h_recognizer_pipeline_es.md new file mode 100644 index 00000000000000..777b0042248cc7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-5w1h_recognizer_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish 5w1h_recognizer_pipeline pipeline BertForTokenClassification from rsepulvedat +author: John Snow Labs +name: 5w1h_recognizer_pipeline +date: 2024-09-06 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`5w1h_recognizer_pipeline` is a Castilian, Spanish model originally trained by rsepulvedat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/5w1h_recognizer_pipeline_es_5.5.0_3.0_1725633941615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/5w1h_recognizer_pipeline_es_5.5.0_3.0_1725633941615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("5w1h_recognizer_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("5w1h_recognizer_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|5w1h_recognizer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/rsepulvedat/5W1H_Recognizer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-7_shot_sta_slovak_batch10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-7_shot_sta_slovak_batch10_pipeline_en.md new file mode 100644 index 00000000000000..dd1c1815e197a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-7_shot_sta_slovak_batch10_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English 7_shot_sta_slovak_batch10_pipeline pipeline MPNetEmbeddings from Nhat1904 +author: John Snow Labs +name: 7_shot_sta_slovak_batch10_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`7_shot_sta_slovak_batch10_pipeline` is a English model originally trained by Nhat1904. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/7_shot_sta_slovak_batch10_pipeline_en_5.5.0_3.0_1725595952324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/7_shot_sta_slovak_batch10_pipeline_en_5.5.0_3.0_1725595952324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("7_shot_sta_slovak_batch10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("7_shot_sta_slovak_batch10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|7_shot_sta_slovak_batch10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Nhat1904/7_shot_STA_sk_batch10 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-acarisbert_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-acarisbert_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..c300e27a6af8b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-acarisbert_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English acarisbert_distilbert_pipeline pipeline DistilBertForSequenceClassification from ongknsro +author: John Snow Labs +name: acarisbert_distilbert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`acarisbert_distilbert_pipeline` is a English model originally trained by ongknsro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/acarisbert_distilbert_pipeline_en_5.5.0_3.0_1725608455088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/acarisbert_distilbert_pipeline_en_5.5.0_3.0_1725608455088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("acarisbert_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("acarisbert_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|acarisbert_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ongknsro/ACARISBERT-DistilBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-accu_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-accu_2_en.md new file mode 100644 index 00000000000000..fc01a10da118e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-accu_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English accu_2 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_2` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_2_en_5.5.0_3.0_1725613464074.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_2_en_5.5.0_3.0_1725613464074.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_en.md b/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_en.md new file mode 100644 index 00000000000000..7a2bee0b9461df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English acrossapps_ndd_mantisbt_test_content DistilBertForSequenceClassification from lgk03 +author: John Snow Labs +name: acrossapps_ndd_mantisbt_test_content +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`acrossapps_ndd_mantisbt_test_content` is a English model originally trained by lgk03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_mantisbt_test_content_en_5.5.0_3.0_1725607834972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_mantisbt_test_content_en_5.5.0_3.0_1725607834972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("acrossapps_ndd_mantisbt_test_content","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("acrossapps_ndd_mantisbt_test_content", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|acrossapps_ndd_mantisbt_test_content| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lgk03/ACROSSAPPS_NDD-mantisbt_test-content \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_pipeline_en.md new file mode 100644 index 00000000000000..0ccf131525fff1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-acrossapps_ndd_mantisbt_test_content_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English acrossapps_ndd_mantisbt_test_content_pipeline pipeline DistilBertForSequenceClassification from lgk03 +author: John Snow Labs +name: acrossapps_ndd_mantisbt_test_content_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`acrossapps_ndd_mantisbt_test_content_pipeline` is a English model originally trained by lgk03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_mantisbt_test_content_pipeline_en_5.5.0_3.0_1725607852871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_mantisbt_test_content_pipeline_en_5.5.0_3.0_1725607852871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("acrossapps_ndd_mantisbt_test_content_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("acrossapps_ndd_mantisbt_test_content_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|acrossapps_ndd_mantisbt_test_content_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lgk03/ACROSSAPPS_NDD-mantisbt_test-content + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-address_emnet_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-address_emnet_pipeline_en.md new file mode 100644 index 00000000000000..50e7a2003bbf4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-address_emnet_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English address_emnet_pipeline pipeline MPNetEmbeddings from pawan2411 +author: John Snow Labs +name: address_emnet_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`address_emnet_pipeline` is a English model originally trained by pawan2411. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/address_emnet_pipeline_en_5.5.0_3.0_1725594907866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/address_emnet_pipeline_en_5.5.0_3.0_1725594907866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("address_emnet_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("address_emnet_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|address_emnet_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.5 MB| + +## References + +https://huggingface.co/pawan2411/address-emnet + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md new file mode 100644 index 00000000000000..aff1cefc855c57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-afro_xlmr_mini_finetuned_kintweetsd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English afro_xlmr_mini_finetuned_kintweetsd_pipeline pipeline XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: afro_xlmr_mini_finetuned_kintweetsd_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afro_xlmr_mini_finetuned_kintweetsd_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afro_xlmr_mini_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725596562317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afro_xlmr_mini_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725596562317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("afro_xlmr_mini_finetuned_kintweetsd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("afro_xlmr_mini_finetuned_kintweetsd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afro_xlmr_mini_finetuned_kintweetsd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|443.1 MB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-mini-finetuned-kintweetsD + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_en.md new file mode 100644 index 00000000000000..77f6d8451a26fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ag_news_microsoft_deberta_v3_base_seed_3 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: ag_news_microsoft_deberta_v3_base_seed_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ag_news_microsoft_deberta_v3_base_seed_3` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_base_seed_3_en_5.5.0_3.0_1725589884771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_base_seed_3_en_5.5.0_3.0_1725589884771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("ag_news_microsoft_deberta_v3_base_seed_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("ag_news_microsoft_deberta_v3_base_seed_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ag_news_microsoft_deberta_v3_base_seed_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|643.8 MB| + +## References + +https://huggingface.co/utahnlp/ag_news_microsoft_deberta-v3-base_seed-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..47414e44393d83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ag_news_microsoft_deberta_v3_base_seed_3_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: ag_news_microsoft_deberta_v3_base_seed_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ag_news_microsoft_deberta_v3_base_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725589933112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725589933112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ag_news_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ag_news_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ag_news_microsoft_deberta_v3_base_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|643.8 MB| + +## References + +https://huggingface.co/utahnlp/ag_news_microsoft_deberta-v3-base_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_en.md new file mode 100644 index 00000000000000..0c9ef0fb66a53c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ag_news_microsoft_deberta_v3_large_seed_3 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: ag_news_microsoft_deberta_v3_large_seed_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ag_news_microsoft_deberta_v3_large_seed_3` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_large_seed_3_en_5.5.0_3.0_1725590365212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_large_seed_3_en_5.5.0_3.0_1725590365212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("ag_news_microsoft_deberta_v3_large_seed_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("ag_news_microsoft_deberta_v3_large_seed_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ag_news_microsoft_deberta_v3_large_seed_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/ag_news_microsoft_deberta-v3-large_seed-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..b1d259e25dc9c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ag_news_microsoft_deberta_v3_large_seed_3_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: ag_news_microsoft_deberta_v3_large_seed_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ag_news_microsoft_deberta_v3_large_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en_5.5.0_3.0_1725590466204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ag_news_microsoft_deberta_v3_large_seed_3_pipeline_en_5.5.0_3.0_1725590466204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ag_news_microsoft_deberta_v3_large_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ag_news_microsoft_deberta_v3_large_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ag_news_microsoft_deberta_v3_large_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/ag_news_microsoft_deberta-v3-large_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ai_human_detai_kk.md b/docs/_posts/ahmedlone127/2024-09-06-ai_human_detai_kk.md new file mode 100644 index 00000000000000..71d51e08a3936d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ai_human_detai_kk.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Kazakh ai_human_detai DistilBertForSequenceClassification from Ayanm +author: John Snow Labs +name: ai_human_detai +date: 2024-09-06 +tags: [kk, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: kk +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai_human_detai` is a Kazakh model originally trained by Ayanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai_human_detai_kk_5.5.0_3.0_1725608317165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai_human_detai_kk_5.5.0_3.0_1725608317165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("ai_human_detai","kk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("ai_human_detai", "kk") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai_human_detai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|kk| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Ayanm/ai-human-detai \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_base_chinese_sayula_popoluca_zh.md b/docs/_posts/ahmedlone127/2024-09-06-albert_base_chinese_sayula_popoluca_zh.md new file mode 100644 index 00000000000000..e410645a3abbd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_base_chinese_sayula_popoluca_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese albert_base_chinese_sayula_popoluca BertForTokenClassification from ckiplab +author: John Snow Labs +name: albert_base_chinese_sayula_popoluca +date: 2024-09-06 +tags: [zh, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_chinese_sayula_popoluca` is a Chinese model originally trained by ckiplab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_chinese_sayula_popoluca_zh_5.5.0_3.0_1725634264164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_chinese_sayula_popoluca_zh_5.5.0_3.0_1725634264164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("albert_base_chinese_sayula_popoluca","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("albert_base_chinese_sayula_popoluca", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_chinese_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|zh| +|Size:|37.7 MB| + +## References + +https://huggingface.co/ckiplab/albert-base-chinese-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_base_v2_luciayn_en.md b/docs/_posts/ahmedlone127/2024-09-06-albert_base_v2_luciayn_en.md new file mode 100644 index 00000000000000..65e7184b5ee1f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_base_v2_luciayn_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_base_v2_luciayn AlbertForSequenceClassification from luciayn +author: John Snow Labs +name: albert_base_v2_luciayn +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_luciayn` is a English model originally trained by luciayn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_luciayn_en_5.5.0_3.0_1725662347358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_luciayn_en_5.5.0_3.0_1725662347358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_luciayn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_luciayn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_luciayn| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/luciayn/albert_base_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_bbc_news_en.md b/docs/_posts/ahmedlone127/2024-09-06-albert_bbc_news_en.md new file mode 100644 index 00000000000000..f71131073c6a31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_bbc_news_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_bbc_news AlbertForSequenceClassification from AyoubChLin +author: John Snow Labs +name: albert_bbc_news +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_bbc_news` is a English model originally trained by AyoubChLin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_bbc_news_en_5.5.0_3.0_1725662541847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_bbc_news_en_5.5.0_3.0_1725662541847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_bbc_news","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_bbc_news", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_bbc_news| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/AyoubChLin/Albert-bbc-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_en.md new file mode 100644 index 00000000000000..4bf8fdd8f6ec0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_large_v2_fold_3 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_large_v2_fold_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_fold_3` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_fold_3_en_5.5.0_3.0_1725662533844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_fold_3_en_5.5.0_3.0_1725662533844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_fold_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_fold_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_fold_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/research-dump/albert-large-v2_fold_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_pipeline_en.md new file mode 100644 index 00000000000000..012e32614f1784 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_large_v2_fold_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_large_v2_fold_3_pipeline pipeline AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_large_v2_fold_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_fold_3_pipeline` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_fold_3_pipeline_en_5.5.0_3.0_1725662537184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_fold_3_pipeline_en_5.5.0_3.0_1725662537184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_large_v2_fold_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_large_v2_fold_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_fold_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/research-dump/albert-large-v2_fold_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_pipeline_zh.md new file mode 100644 index 00000000000000..72a8a56c0413f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_pipeline_zh.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Chinese albert_tiny_chinese_sayula_popoluca_pipeline pipeline BertForTokenClassification from ckiplab +author: John Snow Labs +name: albert_tiny_chinese_sayula_popoluca_pipeline +date: 2024-09-06 +tags: [zh, open_source, pipeline, onnx] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tiny_chinese_sayula_popoluca_pipeline` is a Chinese model originally trained by ckiplab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_sayula_popoluca_pipeline_zh_5.5.0_3.0_1725663304494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_sayula_popoluca_pipeline_zh_5.5.0_3.0_1725663304494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_tiny_chinese_sayula_popoluca_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_tiny_chinese_sayula_popoluca_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tiny_chinese_sayula_popoluca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|15.2 MB| + +## References + +https://huggingface.co/ckiplab/albert-tiny-chinese-pos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_zh.md b/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_zh.md new file mode 100644 index 00000000000000..96978ac5d8ee32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-albert_tiny_chinese_sayula_popoluca_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese albert_tiny_chinese_sayula_popoluca BertForTokenClassification from ckiplab +author: John Snow Labs +name: albert_tiny_chinese_sayula_popoluca +date: 2024-09-06 +tags: [zh, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tiny_chinese_sayula_popoluca` is a Chinese model originally trained by ckiplab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_sayula_popoluca_zh_5.5.0_3.0_1725663303386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_sayula_popoluca_zh_5.5.0_3.0_1725663303386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("albert_tiny_chinese_sayula_popoluca","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("albert_tiny_chinese_sayula_popoluca", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tiny_chinese_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|zh| +|Size:|15.2 MB| + +## References + +https://huggingface.co/ckiplab/albert-tiny-chinese-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_en.md new file mode 100644 index 00000000000000..57e7a2c0c389f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_all_permutations_64_05_mnr MPNetEmbeddings from ronanki +author: John Snow Labs +name: all_mpnet_all_permutations_64_05_mnr +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_all_permutations_64_05_mnr` is a English model originally trained by ronanki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_all_permutations_64_05_mnr_en_5.5.0_3.0_1725594872639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_all_permutations_64_05_mnr_en_5.5.0_3.0_1725594872639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_all_permutations_64_05_mnr","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_all_permutations_64_05_mnr","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_all_permutations_64_05_mnr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ronanki/all_mpnet_all_permutations_64_05_MNR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_pipeline_en.md new file mode 100644 index 00000000000000..7bd91199f6a3c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_all_permutations_64_05_mnr_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_all_permutations_64_05_mnr_pipeline pipeline MPNetEmbeddings from ronanki +author: John Snow Labs +name: all_mpnet_all_permutations_64_05_mnr_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_all_permutations_64_05_mnr_pipeline` is a English model originally trained by ronanki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_all_permutations_64_05_mnr_pipeline_en_5.5.0_3.0_1725594896975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_all_permutations_64_05_mnr_pipeline_en_5.5.0_3.0_1725594896975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_all_permutations_64_05_mnr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_all_permutations_64_05_mnr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_all_permutations_64_05_mnr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ronanki/all_mpnet_all_permutations_64_05_MNR + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en.md new file mode 100644 index 00000000000000..18f79bb34012c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn MPNetEmbeddings from ahessamb +author: John Snow Labs +name: all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn` is a English model originally trained by ahessamb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en_5.5.0_3.0_1725595153806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_en_5.5.0_3.0_1725595153806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ahessamb/all-mpnet-base-v2-2epoch-30000-mar2-closs-prsn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en.md new file mode 100644 index 00000000000000..6dbeb7881a1b8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline pipeline MPNetEmbeddings from ahessamb +author: John Snow Labs +name: all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline` is a English model originally trained by ahessamb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en_5.5.0_3.0_1725595174677.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline_en_5.5.0_3.0_1725595174677.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_2epoch_30000_mar2_closs_prsn_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ahessamb/all-mpnet-base-v2-2epoch-30000-mar2-closs-prsn + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en.md new file mode 100644 index 00000000000000..cb31de7de99fba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline pipeline MPNetForSequenceClassification from florentgbelidji +author: John Snow Labs +name: all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline` is a English model originally trained by florentgbelidji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en_5.5.0_3.0_1725655765622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline_en_5.5.0_3.0_1725655765622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2__tweet_eval_emotion__classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/florentgbelidji/all-mpnet-base-v2__tweet_eval_emotion__classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_en.md new file mode 100644 index 00000000000000..da9e37002ac263 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_bioasq_matryoshka MPNetEmbeddings from juanpablomesa +author: John Snow Labs +name: all_mpnet_base_v2_bioasq_matryoshka +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_bioasq_matryoshka` is a English model originally trained by juanpablomesa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_bioasq_matryoshka_en_5.5.0_3.0_1725594872804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_bioasq_matryoshka_en_5.5.0_3.0_1725594872804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_bioasq_matryoshka","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_bioasq_matryoshka","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_bioasq_matryoshka| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/juanpablomesa/all-mpnet-base-v2-bioasq-matryoshka \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_pipeline_en.md new file mode 100644 index 00000000000000..b6350db1298ad7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_bioasq_matryoshka_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_bioasq_matryoshka_pipeline pipeline MPNetEmbeddings from juanpablomesa +author: John Snow Labs +name: all_mpnet_base_v2_bioasq_matryoshka_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_bioasq_matryoshka_pipeline` is a English model originally trained by juanpablomesa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_bioasq_matryoshka_pipeline_en_5.5.0_3.0_1725594896208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_bioasq_matryoshka_pipeline_en_5.5.0_3.0_1725594896208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_bioasq_matryoshka_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_bioasq_matryoshka_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_bioasq_matryoshka_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/juanpablomesa/all-mpnet-base-v2-bioasq-matryoshka + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en.md new file mode 100644 index 00000000000000..3b1434f5362e24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25 MPNetEmbeddings from binhcode25 +author: John Snow Labs +name: all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25` is a English model originally trained by binhcode25. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en_5.5.0_3.0_1725595446867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25_en_5.5.0_3.0_1725595446867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_fine_tuned_epochs_8_binhcode25| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/binhcode25/all-mpnet-base-v2-fine-tuned-epochs-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_en.md new file mode 100644 index 00000000000000..a0f9feec0c8eea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_ict_ep30 MPNetEmbeddings from hmehta92 +author: John Snow Labs +name: all_mpnet_base_v2_ict_ep30 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_ict_ep30` is a English model originally trained by hmehta92. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ict_ep30_en_5.5.0_3.0_1725595894806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ict_ep30_en_5.5.0_3.0_1725595894806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_ict_ep30","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_ict_ep30","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_ict_ep30| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/hmehta92/all-mpnet-base-v2-ict-ep30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_pipeline_en.md new file mode 100644 index 00000000000000..e5e3c4602ea7ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-all_mpnet_base_v2_ict_ep30_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_ict_ep30_pipeline pipeline MPNetEmbeddings from hmehta92 +author: John Snow Labs +name: all_mpnet_base_v2_ict_ep30_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_ict_ep30_pipeline` is a English model originally trained by hmehta92. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ict_ep30_pipeline_en_5.5.0_3.0_1725595915061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ict_ep30_pipeline_en_5.5.0_3.0_1725595915061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_ict_ep30_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_ict_ep30_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_ict_ep30_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/hmehta92/all-mpnet-base-v2-ict-ep30 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_en.md b/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_en.md new file mode 100644 index 00000000000000..d20a4ca0c904a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ancient_greek_to_1453_ner_bert BertForTokenClassification from UGARIT +author: John Snow Labs +name: ancient_greek_to_1453_ner_bert +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ancient_greek_to_1453_ner_bert` is a English model originally trained by UGARIT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ancient_greek_to_1453_ner_bert_en_5.5.0_3.0_1725600494387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ancient_greek_to_1453_ner_bert_en_5.5.0_3.0_1725600494387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("ancient_greek_to_1453_ner_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("ancient_greek_to_1453_ner_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ancient_greek_to_1453_ner_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/UGARIT/grc-ner-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_pipeline_en.md new file mode 100644 index 00000000000000..63dc9515c390ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ancient_greek_to_1453_ner_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ancient_greek_to_1453_ner_bert_pipeline pipeline BertForTokenClassification from UGARIT +author: John Snow Labs +name: ancient_greek_to_1453_ner_bert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ancient_greek_to_1453_ner_bert_pipeline` is a English model originally trained by UGARIT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ancient_greek_to_1453_ner_bert_pipeline_en_5.5.0_3.0_1725600515881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ancient_greek_to_1453_ner_bert_pipeline_en_5.5.0_3.0_1725600515881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ancient_greek_to_1453_ner_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ancient_greek_to_1453_ner_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ancient_greek_to_1453_ner_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/UGARIT/grc-ner-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-assistantapp_whisper_quran_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-06-assistantapp_whisper_quran_pipeline_ar.md new file mode 100644 index 00000000000000..956f3cf76bed00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-assistantapp_whisper_quran_pipeline_ar.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Arabic assistantapp_whisper_quran_pipeline pipeline WhisperForCTC from AssistantApp +author: John Snow Labs +name: assistantapp_whisper_quran_pipeline +date: 2024-09-06 +tags: [ar, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`assistantapp_whisper_quran_pipeline` is a Arabic model originally trained by AssistantApp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/assistantapp_whisper_quran_pipeline_ar_5.5.0_3.0_1725603926900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/assistantapp_whisper_quran_pipeline_ar_5.5.0_3.0_1725603926900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("assistantapp_whisper_quran_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("assistantapp_whisper_quran_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|assistantapp_whisper_quran_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|643.1 MB| + +## References + +https://huggingface.co/AssistantApp/assistantapp-whisper-quran + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-atte_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-atte_0_pipeline_en.md new file mode 100644 index 00000000000000..665581640d41b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-atte_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English atte_0_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: atte_0_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`atte_0_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/atte_0_pipeline_en_5.5.0_3.0_1725613057685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/atte_0_pipeline_en_5.5.0_3.0_1725613057685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("atte_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("atte_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|atte_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Atte_0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_en.md b/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_en.md new file mode 100644 index 00000000000000..6c608245ba0015 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English audiosangraha_audio_tonga_tonga_islands_text WhisperForCTC from AqeelShafy7 +author: John Snow Labs +name: audiosangraha_audio_tonga_tonga_islands_text +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`audiosangraha_audio_tonga_tonga_islands_text` is a English model originally trained by AqeelShafy7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/audiosangraha_audio_tonga_tonga_islands_text_en_5.5.0_3.0_1725603649651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/audiosangraha_audio_tonga_tonga_islands_text_en_5.5.0_3.0_1725603649651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("audiosangraha_audio_tonga_tonga_islands_text","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("audiosangraha_audio_tonga_tonga_islands_text", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|audiosangraha_audio_tonga_tonga_islands_text| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/AqeelShafy7/AudioSangraha-Audio_to_Text \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_pipeline_en.md new file mode 100644 index 00000000000000..d59cbb29b7560e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-audiosangraha_audio_tonga_tonga_islands_text_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English audiosangraha_audio_tonga_tonga_islands_text_pipeline pipeline WhisperForCTC from AqeelShafy7 +author: John Snow Labs +name: audiosangraha_audio_tonga_tonga_islands_text_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`audiosangraha_audio_tonga_tonga_islands_text_pipeline` is a English model originally trained by AqeelShafy7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/audiosangraha_audio_tonga_tonga_islands_text_pipeline_en_5.5.0_3.0_1725603736535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/audiosangraha_audio_tonga_tonga_islands_text_pipeline_en_5.5.0_3.0_1725603736535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("audiosangraha_audio_tonga_tonga_islands_text_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("audiosangraha_audio_tonga_tonga_islands_text_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|audiosangraha_audio_tonga_tonga_islands_text_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/AqeelShafy7/AudioSangraha-Audio_to_Text + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_en.md b/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_en.md new file mode 100644 index 00000000000000..10e4067a0d2501 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English autotrain_okr_iptal_v5_48523117787 XlmRoBertaForSequenceClassification from ekincanozcelik +author: John Snow Labs +name: autotrain_okr_iptal_v5_48523117787 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_okr_iptal_v5_48523117787` is a English model originally trained by ekincanozcelik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_v5_48523117787_en_5.5.0_3.0_1725619691072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_v5_48523117787_en_5.5.0_3.0_1725619691072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("autotrain_okr_iptal_v5_48523117787","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("autotrain_okr_iptal_v5_48523117787", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_okr_iptal_v5_48523117787| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|778.4 MB| + +## References + +https://huggingface.co/ekincanozcelik/autotrain-okr_iptal_v5-48523117787 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_pipeline_en.md new file mode 100644 index 00000000000000..ddc7d2a656bb75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-autotrain_okr_iptal_v5_48523117787_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English autotrain_okr_iptal_v5_48523117787_pipeline pipeline XlmRoBertaForSequenceClassification from ekincanozcelik +author: John Snow Labs +name: autotrain_okr_iptal_v5_48523117787_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_okr_iptal_v5_48523117787_pipeline` is a English model originally trained by ekincanozcelik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_v5_48523117787_pipeline_en_5.5.0_3.0_1725619838965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_okr_iptal_v5_48523117787_pipeline_en_5.5.0_3.0_1725619838965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("autotrain_okr_iptal_v5_48523117787_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("autotrain_okr_iptal_v5_48523117787_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_okr_iptal_v5_48523117787_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|778.4 MB| + +## References + +https://huggingface.co/ekincanozcelik/autotrain-okr_iptal_v5-48523117787 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_en.md b/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_en.md new file mode 100644 index 00000000000000..46a97aaad9a7ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English banglapunctuationmodel BertForTokenClassification from kabir5297 +author: John Snow Labs +name: banglapunctuationmodel +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglapunctuationmodel` is a English model originally trained by kabir5297. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglapunctuationmodel_en_5.5.0_3.0_1725600816869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglapunctuationmodel_en_5.5.0_3.0_1725600816869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("banglapunctuationmodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("banglapunctuationmodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglapunctuationmodel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|890.2 MB| + +## References + +https://huggingface.co/kabir5297/BanglaPunctuationModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_pipeline_en.md new file mode 100644 index 00000000000000..018cbccae499f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-banglapunctuationmodel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English banglapunctuationmodel_pipeline pipeline BertForTokenClassification from kabir5297 +author: John Snow Labs +name: banglapunctuationmodel_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`banglapunctuationmodel_pipeline` is a English model originally trained by kabir5297. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/banglapunctuationmodel_pipeline_en_5.5.0_3.0_1725600859894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/banglapunctuationmodel_pipeline_en_5.5.0_3.0_1725600859894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("banglapunctuationmodel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("banglapunctuationmodel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|banglapunctuationmodel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|890.3 MB| + +## References + +https://huggingface.co/kabir5297/BanglaPunctuationModel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_en.md b/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_en.md new file mode 100644 index 00000000000000..109a1531c4018d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English bat32_lr10_5_epo10 WhisperForCTC from adiren7 +author: John Snow Labs +name: bat32_lr10_5_epo10 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bat32_lr10_5_epo10` is a English model originally trained by adiren7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bat32_lr10_5_epo10_en_5.5.0_3.0_1725586503139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bat32_lr10_5_epo10_en_5.5.0_3.0_1725586503139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("bat32_lr10_5_epo10","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("bat32_lr10_5_epo10", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bat32_lr10_5_epo10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/adiren7/bat32_lr10-5_epo10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_pipeline_en.md new file mode 100644 index 00000000000000..9e66c38934a442 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bat32_lr10_5_epo10_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bat32_lr10_5_epo10_pipeline pipeline WhisperForCTC from adiren7 +author: John Snow Labs +name: bat32_lr10_5_epo10_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bat32_lr10_5_epo10_pipeline` is a English model originally trained by adiren7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bat32_lr10_5_epo10_pipeline_en_5.5.0_3.0_1725586610679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bat32_lr10_5_epo10_pipeline_en_5.5.0_3.0_1725586610679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bat32_lr10_5_epo10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bat32_lr10_5_epo10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bat32_lr10_5_epo10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/adiren7/bat32_lr10-5_epo10 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_en.md new file mode 100644 index 00000000000000..e7ac20dda163ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_cased_finetuned_conll2003_english_ner BertForTokenClassification from MrRobson9 +author: John Snow Labs +name: bert_base_cased_finetuned_conll2003_english_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_conll2003_english_ner` is a English model originally trained by MrRobson9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_conll2003_english_ner_en_5.5.0_3.0_1725634101611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_conll2003_english_ner_en_5.5.0_3.0_1725634101611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_cased_finetuned_conll2003_english_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_cased_finetuned_conll2003_english_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_conll2003_english_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/MrRobson9/bert-base-cased-finetuned-conll2003-english-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_pipeline_en.md new file mode 100644 index 00000000000000..9a021ba3c13937 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_conll2003_english_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_cased_finetuned_conll2003_english_ner_pipeline pipeline BertForTokenClassification from MrRobson9 +author: John Snow Labs +name: bert_base_cased_finetuned_conll2003_english_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_conll2003_english_ner_pipeline` is a English model originally trained by MrRobson9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_conll2003_english_ner_pipeline_en_5.5.0_3.0_1725634121324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_conll2003_english_ner_pipeline_en_5.5.0_3.0_1725634121324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_cased_finetuned_conll2003_english_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_cased_finetuned_conll2003_english_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_conll2003_english_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/MrRobson9/bert-base-cased-finetuned-conll2003-english-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..5ee58e7b567c30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English bert_base_cased_finetuned_mrpc BertEmbeddings from huggingface +author: John Snow Labs +name: bert_base_cased_finetuned_mrpc +date: 2024-09-06 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_mrpc` is a English model originally trained by huggingface. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_mrpc_en_5.5.0_3.0_1725614441729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_mrpc_en_5.5.0_3.0_1725614441729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_mrpc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|403.6 MB| + +## References + +References + +https://huggingface.co/bert-base-cased-finetuned-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_pipeline_en.md new file mode 100644 index 00000000000000..9de95be1972e8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_cased_finetuned_mrpc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_cased_finetuned_mrpc_pipeline pipeline BertEmbeddings from google-bert +author: John Snow Labs +name: bert_base_cased_finetuned_mrpc_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_mrpc_pipeline` is a English model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725614464320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725614464320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_cased_finetuned_mrpc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_cased_finetuned_mrpc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_mrpc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/google-bert/bert-base-cased-finetuned-mrpc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_cased_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_cased_pipeline_de.md new file mode 100644 index 00000000000000..02c73705c3e1b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_cased_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German bert_base_german_dbmdz_cased_pipeline pipeline BertEmbeddings from google-bert +author: John Snow Labs +name: bert_base_german_dbmdz_cased_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_dbmdz_cased_pipeline` is a German model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_cased_pipeline_de_5.5.0_3.0_1725614685159.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_cased_pipeline_de_5.5.0_3.0_1725614685159.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_german_dbmdz_cased_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_german_dbmdz_cased_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_dbmdz_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|409.9 MB| + +## References + +https://huggingface.co/google-bert/bert-base-german-dbmdz-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_uncased_de.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_uncased_de.md new file mode 100644 index 00000000000000..7d40c08fe42c92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_german_dbmdz_uncased_de.md @@ -0,0 +1,92 @@ +--- +layout: model +title: German bert_base_german_dbmdz_uncased BertEmbeddings from huggingface +author: John Snow Labs +name: bert_base_german_dbmdz_uncased +date: 2024-09-06 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_dbmdz_uncased` is a German model originally trained by huggingface. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_de_5.5.0_3.0_1725659208309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_de_5.5.0_3.0_1725659208309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_dbmdz_uncased","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_dbmdz_uncased", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_dbmdz_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|de| +|Size:|409.9 MB| + +## References + +References + +https://huggingface.co/bert-base-german-dbmdz-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_magicslabnu_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_magicslabnu_en.md new file mode 100644 index 00000000000000..195c311d10d152 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_magicslabnu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_magicslabnu BertEmbeddings from magicslabnu +author: John Snow Labs +name: bert_base_magicslabnu +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_magicslabnu` is a English model originally trained by magicslabnu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_magicslabnu_en_5.5.0_3.0_1725659518401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_magicslabnu_en_5.5.0_3.0_1725659518401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_magicslabnu","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_magicslabnu","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_magicslabnu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/magicslabnu/BERT_base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_pipeline_xx.md new file mode 100644 index 00000000000000..a6551d960ea579 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_finetuned_amharic_pipeline pipeline BertEmbeddings from Davlan +author: John Snow Labs +name: bert_base_multilingual_cased_finetuned_amharic_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_finetuned_amharic_pipeline` is a Multilingual model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_amharic_pipeline_xx_5.5.0_3.0_1725659564013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_amharic_pipeline_xx_5.5.0_3.0_1725659564013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_multilingual_cased_finetuned_amharic_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_multilingual_cased_finetuned_amharic_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_finetuned_amharic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/Davlan/bert-base-multilingual-cased-finetuned-amharic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_xx.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_xx.md new file mode 100644 index 00000000000000..fc4ea9d1b4e1e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_finetuned_amharic_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_finetuned_amharic BertEmbeddings from Davlan +author: John Snow Labs +name: bert_base_multilingual_cased_finetuned_amharic +date: 2024-09-06 +tags: [xx, open_source, onnx, embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_finetuned_amharic` is a Multilingual model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_amharic_xx_5.5.0_3.0_1725659533233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_amharic_xx_5.5.0_3.0_1725659533233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_multilingual_cased_finetuned_amharic","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_multilingual_cased_finetuned_amharic","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_finetuned_amharic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/Davlan/bert-base-multilingual-cased-finetuned-amharic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_pipeline_xx.md new file mode 100644 index 00000000000000..82665dfe858da1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_google_bert_pipeline pipeline BertEmbeddings from google-bert +author: John Snow Labs +name: bert_base_multilingual_cased_google_bert_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_google_bert_pipeline` is a Multilingual model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_google_bert_pipeline_xx_5.5.0_3.0_1725614411171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_google_bert_pipeline_xx_5.5.0_3.0_1725614411171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_multilingual_cased_google_bert_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_multilingual_cased_google_bert_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_google_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.1 MB| + +## References + +https://huggingface.co/google-bert/bert-base-multilingual-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_xx.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_xx.md new file mode 100644 index 00000000000000..d6ab1db0291675 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_multilingual_cased_google_bert_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_google_bert BertEmbeddings from google-bert +author: John Snow Labs +name: bert_base_multilingual_cased_google_bert +date: 2024-09-06 +tags: [xx, open_source, onnx, embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_google_bert` is a Multilingual model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_google_bert_xx_5.5.0_3.0_1725614379036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_google_bert_xx_5.5.0_3.0_1725614379036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_multilingual_cased_google_bert","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_multilingual_cased_google_bert","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_google_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/google-bert/bert-base-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_en.md new file mode 100644 index 00000000000000..c56ad88c7b4ac8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_ner_nc1 BertForTokenClassification from OllieStanley +author: John Snow Labs +name: bert_base_ner_nc1 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_ner_nc1` is a English model originally trained by OllieStanley. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_ner_nc1_en_5.5.0_3.0_1725600709466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_ner_nc1_en_5.5.0_3.0_1725600709466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_ner_nc1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_ner_nc1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_ner_nc1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.8 MB| + +## References + +https://huggingface.co/OllieStanley/bert-base-ner-nc1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_pipeline_en.md new file mode 100644 index 00000000000000..e9f437a45362e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_ner_nc1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_ner_nc1_pipeline pipeline BertForTokenClassification from OllieStanley +author: John Snow Labs +name: bert_base_ner_nc1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_ner_nc1_pipeline` is a English model originally trained by OllieStanley. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_ner_nc1_pipeline_en_5.5.0_3.0_1725600730551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_ner_nc1_pipeline_en_5.5.0_3.0_1725600730551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_ner_nc1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_ner_nc1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_ner_nc1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.8 MB| + +## References + +https://huggingface.co/OllieStanley/bert-base-ner-nc1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_en.md new file mode 100644 index 00000000000000..1479e5529c7133 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_turkish_cased_ner_finetuned_ner BertForTokenClassification from Huseyin +author: John Snow Labs +name: bert_base_turkish_cased_ner_finetuned_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_cased_ner_finetuned_ner` is a English model originally trained by Huseyin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_ner_finetuned_ner_en_5.5.0_3.0_1725634364175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_ner_finetuned_ner_en_5.5.0_3.0_1725634364175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_turkish_cased_ner_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_turkish_cased_ner_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_cased_ner_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Huseyin/bert-base-turkish-cased-ner-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..06d548353ccddc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_cased_ner_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_turkish_cased_ner_finetuned_ner_pipeline pipeline BertForTokenClassification from Huseyin +author: John Snow Labs +name: bert_base_turkish_cased_ner_finetuned_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_cased_ner_finetuned_ner_pipeline` is a English model originally trained by Huseyin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_ner_finetuned_ner_pipeline_en_5.5.0_3.0_1725634383746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_ner_finetuned_ner_pipeline_en_5.5.0_3.0_1725634383746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_turkish_cased_ner_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_turkish_cased_ner_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_cased_ner_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Huseyin/bert-base-turkish-cased-ner-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_ner_cased_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_ner_cased_pipeline_tr.md new file mode 100644 index 00000000000000..1d5e42190a7719 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_turkish_ner_cased_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish bert_base_turkish_ner_cased_pipeline pipeline BertForTokenClassification from girayyagmur +author: John Snow Labs +name: bert_base_turkish_ner_cased_pipeline +date: 2024-09-06 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_ner_cased_pipeline` is a Turkish model originally trained by girayyagmur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_ner_cased_pipeline_tr_5.5.0_3.0_1725663771093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_ner_cased_pipeline_tr_5.5.0_3.0_1725663771093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_turkish_ner_cased_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_turkish_ner_cased_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_ner_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|412.4 MB| + +## References + +https://huggingface.co/girayyagmur/bert-base-turkish-ner-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_base_uncased_contracts_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_base_uncased_contracts_pipeline_en.md new file mode 100644 index 00000000000000..e354b07c4f5646 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_base_uncased_contracts_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_uncased_contracts_pipeline pipeline BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_uncased_contracts_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_contracts_pipeline` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contracts_pipeline_en_5.5.0_3.0_1725659799105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contracts_pipeline_en_5.5.0_3.0_1725659799105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_uncased_contracts_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_uncased_contracts_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_contracts_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-contracts + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_checkpoint_980000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_checkpoint_980000_pipeline_en.md new file mode 100644 index 00000000000000..478e23077fc0b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_checkpoint_980000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_checkpoint_980000_pipeline pipeline BertEmbeddings from Atipico1 +author: John Snow Labs +name: bert_checkpoint_980000_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_checkpoint_980000_pipeline` is a English model originally trained by Atipico1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_checkpoint_980000_pipeline_en_5.5.0_3.0_1725659744020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_checkpoint_980000_pipeline_en_5.5.0_3.0_1725659744020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_checkpoint_980000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_checkpoint_980000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_checkpoint_980000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Atipico1/bert-checkpoint-980000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_fda_nutrition_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_fda_nutrition_ner_en.md new file mode 100644 index 00000000000000..d72a86888dff9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_fda_nutrition_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_fda_nutrition_ner BertForTokenClassification from sgarbi +author: John Snow Labs +name: bert_fda_nutrition_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fda_nutrition_ner` is a English model originally trained by sgarbi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fda_nutrition_ner_en_5.5.0_3.0_1725600606640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fda_nutrition_ner_en_5.5.0_3.0_1725600606640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_fda_nutrition_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_fda_nutrition_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fda_nutrition_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/sgarbi/bert-fda-nutrition-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_en.md new file mode 100644 index 00000000000000..7168042363d017 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_fine_tuned_ner_resume BertForTokenClassification from bennyW1983 +author: John Snow Labs +name: bert_fine_tuned_ner_resume +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fine_tuned_ner_resume` is a English model originally trained by bennyW1983. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fine_tuned_ner_resume_en_5.5.0_3.0_1725633762093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fine_tuned_ner_resume_en_5.5.0_3.0_1725633762093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_fine_tuned_ner_resume","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_fine_tuned_ner_resume", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fine_tuned_ner_resume| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bennyW1983/bert-fine-tuned-NER-resume \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_pipeline_en.md new file mode 100644 index 00000000000000..adbf4b85f4da14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_fine_tuned_ner_resume_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_fine_tuned_ner_resume_pipeline pipeline BertForTokenClassification from bennyW1983 +author: John Snow Labs +name: bert_fine_tuned_ner_resume_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fine_tuned_ner_resume_pipeline` is a English model originally trained by bennyW1983. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fine_tuned_ner_resume_pipeline_en_5.5.0_3.0_1725633782736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fine_tuned_ner_resume_pipeline_en_5.5.0_3.0_1725633782736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_fine_tuned_ner_resume_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_fine_tuned_ner_resume_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fine_tuned_ner_resume_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bennyW1983/bert-fine-tuned-NER-resume + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned1_squad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned1_squad_pipeline_en.md new file mode 100644 index 00000000000000..b9256f5fa34ddc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned1_squad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_finetuned1_squad_pipeline pipeline XlmRoBertaForQuestionAnswering from Echiguerkh +author: John Snow Labs +name: bert_finetuned1_squad_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned1_squad_pipeline` is a English model originally trained by Echiguerkh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned1_squad_pipeline_en_5.5.0_3.0_1725640938802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned1_squad_pipeline_en_5.5.0_3.0_1725640938802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned1_squad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned1_squad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned1_squad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.9 MB| + +## References + +https://huggingface.co/Echiguerkh/bert-finetuned1-squad + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_en.md new file mode 100644 index 00000000000000..4cb70c311e7902 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_ashaduzzaman BertForTokenClassification from ashaduzzaman +author: John Snow Labs +name: bert_finetuned_ner_ashaduzzaman +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_ashaduzzaman` is a English model originally trained by ashaduzzaman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_ashaduzzaman_en_5.5.0_3.0_1725663531875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_ashaduzzaman_en_5.5.0_3.0_1725663531875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_ashaduzzaman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_ashaduzzaman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_ashaduzzaman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/ashaduzzaman/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_pipeline_en.md new file mode 100644 index 00000000000000..23a7ce9028321f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_ashaduzzaman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_ashaduzzaman_pipeline pipeline BertForTokenClassification from ashaduzzaman +author: John Snow Labs +name: bert_finetuned_ner_ashaduzzaman_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_ashaduzzaman_pipeline` is a English model originally trained by ashaduzzaman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_ashaduzzaman_pipeline_en_5.5.0_3.0_1725663550788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_ashaduzzaman_pipeline_en_5.5.0_3.0_1725663550788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_ashaduzzaman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_ashaduzzaman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_ashaduzzaman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/ashaduzzaman/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_en.md new file mode 100644 index 00000000000000..387ca72fb933f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner_bangla_arbitropy BertForTokenClassification from arbitropy +author: John Snow Labs +name: bert_finetuned_ner_bangla_arbitropy +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_bangla_arbitropy` is a English model originally trained by arbitropy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_bangla_arbitropy_en_5.5.0_3.0_1725634163648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_bangla_arbitropy_en_5.5.0_3.0_1725634163648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_bangla_arbitropy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner_bangla_arbitropy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_bangla_arbitropy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/arbitropy/bert-finetuned-ner-bangla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_pipeline_en.md new file mode 100644 index 00000000000000..599c84019ad76e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_finetuned_ner_bangla_arbitropy_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_bangla_arbitropy_pipeline pipeline BertForTokenClassification from arbitropy +author: John Snow Labs +name: bert_finetuned_ner_bangla_arbitropy_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_bangla_arbitropy_pipeline` is a English model originally trained by arbitropy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_bangla_arbitropy_pipeline_en_5.5.0_3.0_1725634184709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_bangla_arbitropy_pipeline_en_5.5.0_3.0_1725634184709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_bangla_arbitropy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_bangla_arbitropy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_bangla_arbitropy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/arbitropy/bert-finetuned-ner-bangla + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_gps_research_001_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_gps_research_001_en.md new file mode 100644 index 00000000000000..e8888525b87868 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_gps_research_001_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_gps_research_001 DistilBertForQuestionAnswering from HGottschall +author: John Snow Labs +name: bert_gps_research_001 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_gps_research_001` is a English model originally trained by HGottschall. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_gps_research_001_en_5.5.0_3.0_1725654515830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_gps_research_001_en_5.5.0_3.0_1725654515830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_gps_research_001","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_gps_research_001", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_gps_research_001| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/HGottschall/bert-gps-research-001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_en.md new file mode 100644 index 00000000000000..eb3065cf8ec163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: BERT Embeddings (Large Cased) +author: John Snow Labs +name: bert_large_cased +date: 2024-09-06 +tags: [open_source, embeddings, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model contains a deep bidirectional transformer trained on Wikipedia and the BookCorpus. The details are described in the paper "[BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805)". + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_en_5.5.0_3.0_1725615147862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_en_5.5.0_3.0_1725615147862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +... +embeddings = BertEmbeddings.pretrained("bert_large_cased", "en") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, embeddings]) +pipeline_model = nlp_pipeline.fit(spark.createDataFrame([[""]]).toDF("text")) +result = pipeline_model.transform(spark.createDataFrame([['I love NLP']], ["text"])) +``` +```scala +... +val embeddings = BertEmbeddings.pretrained("bert_large_cased", "en") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, tokenizer, embeddings)) +val data = Seq("I love NLP").toDF("text") +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu + +text = ["I love NLP"] +embeddings_df = nlu.load('en.embed.bert.large_cased').predict(text, output_level='token') +embeddings_df +``` +
+ +## Results + +```bash + + + token en_embed_bert_large_cased_embeddings + + I [-0.5893247723579407, -1.1389378309249878, -0.... + love [-0.8002289533615112, -0.15043185651302338, 0.... + NLP [-0.8995863199234009, 0.08327484875917435, 0.9... +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.2 GB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_whole_word_masking_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_whole_word_masking_en.md new file mode 100644 index 00000000000000..f55c3aac7d4b76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_large_cased_whole_word_masking_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English bert_large_cased_whole_word_masking BertEmbeddings from huggingface +author: John Snow Labs +name: bert_large_cased_whole_word_masking +date: 2024-09-06 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_whole_word_masking` is a English model originally trained by huggingface. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_en_5.5.0_3.0_1725659152596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_en_5.5.0_3.0_1725659152596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_whole_word_masking","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_whole_word_masking", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_whole_word_masking| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.2 GB| + +## References + +References + +https://huggingface.co/bert-large-cased-whole-word-masking \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_en.md new file mode 100644 index 00000000000000..66327594b07e16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_large_uncased_squad_v2 DistilBertForQuestionAnswering from quynguyen1704 +author: John Snow Labs +name: bert_large_uncased_squad_v2 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_squad_v2` is a English model originally trained by quynguyen1704. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_squad_v2_en_5.5.0_3.0_1725652574177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_squad_v2_en_5.5.0_3.0_1725652574177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_large_uncased_squad_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("bert_large_uncased_squad_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_squad_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/quynguyen1704/BERT-large-uncased-squad_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_pipeline_en.md new file mode 100644 index 00000000000000..f15b90a4f44823 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_large_uncased_squad_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_large_uncased_squad_v2_pipeline pipeline DistilBertForQuestionAnswering from quynguyen1704 +author: John Snow Labs +name: bert_large_uncased_squad_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_squad_v2_pipeline` is a English model originally trained by quynguyen1704. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_squad_v2_pipeline_en_5.5.0_3.0_1725652586183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_squad_v2_pipeline_en_5.5.0_3.0_1725652586183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_large_uncased_squad_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_large_uncased_squad_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_squad_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/quynguyen1704/BERT-large-uncased-squad_v2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_ner_kriyans_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_ner_kriyans_pipeline_en.md new file mode 100644 index 00000000000000..c411831551673e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_ner_kriyans_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_ner_kriyans_pipeline pipeline BertForTokenClassification from Kriyans +author: John Snow Labs +name: bert_ner_kriyans_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ner_kriyans_pipeline` is a English model originally trained by Kriyans. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ner_kriyans_pipeline_en_5.5.0_3.0_1725600309689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ner_kriyans_pipeline_en_5.5.0_3.0_1725600309689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_ner_kriyans_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_ner_kriyans_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ner_kriyans_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Kriyans/Bert-NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_phishing_classifier_student_jeahyung_en.md b/docs/_posts/ahmedlone127/2024-09-06-bert_phishing_classifier_student_jeahyung_en.md new file mode 100644 index 00000000000000..7402e1be73bf36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_phishing_classifier_student_jeahyung_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_phishing_classifier_student_jeahyung DistilBertForSequenceClassification from JeaHyung +author: John Snow Labs +name: bert_phishing_classifier_student_jeahyung +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_phishing_classifier_student_jeahyung` is a English model originally trained by JeaHyung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_phishing_classifier_student_jeahyung_en_5.5.0_3.0_1725608439501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_phishing_classifier_student_jeahyung_en_5.5.0_3.0_1725608439501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_phishing_classifier_student_jeahyung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_phishing_classifier_student_jeahyung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_phishing_classifier_student_jeahyung| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|196.4 MB| + +## References + +https://huggingface.co/JeaHyung/bert-phishing-classifier_student \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_pipeline_vi.md new file mode 100644 index 00000000000000..3a4c80dce6f37f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_pipeline_vi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Vietnamese bert_token_classifier_base_vietnamese_upos_pipeline pipeline BertForTokenClassification from KoichiYasuoka +author: John Snow Labs +name: bert_token_classifier_base_vietnamese_upos_pipeline +date: 2024-09-06 +tags: [vi, open_source, pipeline, onnx] +task: Named Entity Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_base_vietnamese_upos_pipeline` is a Vietnamese model originally trained by KoichiYasuoka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_base_vietnamese_upos_pipeline_vi_5.5.0_3.0_1725663523889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_base_vietnamese_upos_pipeline_vi_5.5.0_3.0_1725663523889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_base_vietnamese_upos_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_base_vietnamese_upos_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_base_vietnamese_upos_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|429.0 MB| + +## References + +https://huggingface.co/KoichiYasuoka/bert-base-vietnamese-upos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_vi.md b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_vi.md new file mode 100644 index 00000000000000..7877e6c819ea58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_base_vietnamese_upos_vi.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Vietnamese BertForTokenClassification Base Cased model (from KoichiYasuoka) +author: John Snow Labs +name: bert_token_classifier_base_vietnamese_upos +date: 2024-09-06 +tags: [vi, open_source, bert, token_classification, ner, onnx] +task: Named Entity Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-base-vietnamese-upos` is a Vietnamese model originally trained by `KoichiYasuoka`. + +## Predicted Entities + +`NOUN`, `INTJ`, `AUX`, `ADP`, `DET`, `X`, `SYM`, `NUM`, `PUNCT`, `PRON`, `PROPN`, `VERB`, `ADJ`, `PART`, `CCONJ`, `ADV`, `SCONJ` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_base_vietnamese_upos_vi_5.5.0_3.0_1725663503169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_base_vietnamese_upos_vi_5.5.0_3.0_1725663503169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_base_vietnamese_upos","vi") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_base_vietnamese_upos","vi") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_base_vietnamese_upos| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|vi| +|Size:|429.0 MB| + +## References + +References + +- https://huggingface.co/KoichiYasuoka/bert-base-vietnamese-upos +- https://universaldependencies.org/u/pos/ +- https://github.com/KoichiYasuoka/esupar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_pipeline_tr.md new file mode 100644 index 00000000000000..a5183bfc477945 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish bert_token_classifier_berturk_keyword_extractor_pipeline pipeline BertForTokenClassification from yanekyuk +author: John Snow Labs +name: bert_token_classifier_berturk_keyword_extractor_pipeline +date: 2024-09-06 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_token_classifier_berturk_keyword_extractor_pipeline` is a Turkish model originally trained by yanekyuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_keyword_extractor_pipeline_tr_5.5.0_3.0_1725600451268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_keyword_extractor_pipeline_tr_5.5.0_3.0_1725600451268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_token_classifier_berturk_keyword_extractor_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_token_classifier_berturk_keyword_extractor_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_berturk_keyword_extractor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|412.3 MB| + +## References + +https://huggingface.co/yanekyuk/berturk-keyword-extractor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_tr.md b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_tr.md new file mode 100644 index 00000000000000..55a0c13afce796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_berturk_keyword_extractor_tr.md @@ -0,0 +1,98 @@ +--- +layout: model +title: Turkish BertForTokenClassification Cased model (from yanekyuk) +author: John Snow Labs +name: bert_token_classifier_berturk_keyword_extractor +date: 2024-09-06 +tags: [tr, open_source, bert, token_classification, ner, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `berturk-keyword-extractor` is a Turkish model originally trained by `yanekyuk`. + +## Predicted Entities + +`KEY` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_keyword_extractor_tr_5.5.0_3.0_1725600428409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_berturk_keyword_extractor_tr_5.5.0_3.0_1725600428409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_berturk_keyword_extractor","tr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_berturk_keyword_extractor","tr") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_berturk_keyword_extractor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|412.3 MB| + +## References + +References + +- https://huggingface.co/yanekyuk/berturk-keyword-extractor \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_ner_german_de.md b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_ner_german_de.md new file mode 100644 index 00000000000000..509bb849bc064c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bert_token_classifier_ner_german_de.md @@ -0,0 +1,99 @@ +--- +layout: model +title: German BertForTokenClassification Cased model (from mschiesser) +author: John Snow Labs +name: bert_token_classifier_ner_german +date: 2024-09-06 +tags: [de, open_source, bert, token_classification, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `ner-bert-german` is a German model originally trained by `mschiesser`. + +## Predicted Entities + +`LOC`, `ORG`, `PER` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_token_classifier_ner_german_de_5.5.0_3.0_1725633722976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_token_classifier_ner_german_de_5.5.0_3.0_1725633722976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_ner_german","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_token_classifier_ner_german","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_token_classifier_ner_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|665.1 MB| + +## References + +References + +- https://huggingface.co/mschiesser/ner-bert-german +- https://en.wikipedia.org/wiki/Named-entity_recognition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pipeline_pt.md new file mode 100644 index 00000000000000..cc05db3c517bd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese bertimbau_large_ner_selective_pipeline pipeline BertForTokenClassification from marquesafonso +author: John Snow Labs +name: bertimbau_large_ner_selective_pipeline +date: 2024-09-06 +tags: [pt, open_source, pipeline, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_large_ner_selective_pipeline` is a Portuguese model originally trained by marquesafonso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_selective_pipeline_pt_5.5.0_3.0_1725600445602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_selective_pipeline_pt_5.5.0_3.0_1725600445602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertimbau_large_ner_selective_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertimbau_large_ner_selective_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_large_ner_selective_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|406.0 MB| + +## References + +https://huggingface.co/marquesafonso/bertimbau-large-ner-selective + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pt.md b/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pt.md new file mode 100644 index 00000000000000..237717d78d26c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bertimbau_large_ner_selective_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese bertimbau_large_ner_selective BertForTokenClassification from marquesafonso +author: John Snow Labs +name: bertimbau_large_ner_selective +date: 2024-09-06 +tags: [pt, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_large_ner_selective` is a Portuguese model originally trained by marquesafonso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_selective_pt_5.5.0_3.0_1725600425862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_large_ner_selective_pt_5.5.0_3.0_1725600425862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bertimbau_large_ner_selective","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bertimbau_large_ner_selective", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_large_ner_selective| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|pt| +|Size:|406.0 MB| + +## References + +https://huggingface.co/marquesafonso/bertimbau-large-ner-selective \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_es.md new file mode 100644 index 00000000000000..163c489e642a23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bertin_roberta_large_spanish RoBertaEmbeddings from flax-community +author: John Snow Labs +name: bertin_roberta_large_spanish +date: 2024-09-06 +tags: [es, open_source, onnx, embeddings, roberta] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertin_roberta_large_spanish` is a Castilian, Spanish model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertin_roberta_large_spanish_es_5.5.0_3.0_1725661208460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertin_roberta_large_spanish_es_5.5.0_3.0_1725661208460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bertin_roberta_large_spanish","es") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bertin_roberta_large_spanish","es") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertin_roberta_large_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|es| +|Size:|230.7 MB| + +## References + +https://huggingface.co/flax-community/bertin-roberta-large-spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_pipeline_es.md new file mode 100644 index 00000000000000..539fc4b90367d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bertin_roberta_large_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bertin_roberta_large_spanish_pipeline pipeline RoBertaEmbeddings from flax-community +author: John Snow Labs +name: bertin_roberta_large_spanish_pipeline +date: 2024-09-06 +tags: [es, open_source, pipeline, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertin_roberta_large_spanish_pipeline` is a Castilian, Spanish model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertin_roberta_large_spanish_pipeline_es_5.5.0_3.0_1725661284212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertin_roberta_large_spanish_pipeline_es_5.5.0_3.0_1725661284212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertin_roberta_large_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertin_roberta_large_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertin_roberta_large_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|230.7 MB| + +## References + +https://huggingface.co/flax-community/bertin-roberta-large-spanish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_en.md b/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_en.md new file mode 100644 index 00000000000000..fee2e032fe49d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English best_model_yelp_polarity_64_13 AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_64_13 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_64_13` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_13_en_5.5.0_3.0_1725628180147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_13_en_5.5.0_3.0_1725628180147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_64_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_64_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_64_13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-64-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_pipeline_en.md new file mode 100644 index 00000000000000..f466b35845a16e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-best_model_yelp_polarity_64_13_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English best_model_yelp_polarity_64_13_pipeline pipeline AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_64_13_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_64_13_pipeline` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_13_pipeline_en_5.5.0_3.0_1725628182672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_13_pipeline_en_5.5.0_3.0_1725628182672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("best_model_yelp_polarity_64_13_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("best_model_yelp_polarity_64_13_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_64_13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-64-13 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_en.md b/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_en.md new file mode 100644 index 00000000000000..121b645bb176f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bge_reranker_base_course_skill_tuned XlmRoBertaForSequenceClassification from isy-thl +author: John Snow Labs +name: bge_reranker_base_course_skill_tuned +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_reranker_base_course_skill_tuned` is a English model originally trained by isy-thl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_reranker_base_course_skill_tuned_en_5.5.0_3.0_1725616834392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_reranker_base_course_skill_tuned_en_5.5.0_3.0_1725616834392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("bge_reranker_base_course_skill_tuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("bge_reranker_base_course_skill_tuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_reranker_base_course_skill_tuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|994.3 MB| + +## References + +https://huggingface.co/isy-thl/bge-reranker-base-course-skill-tuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_pipeline_en.md new file mode 100644 index 00000000000000..5f99767858c3ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bge_reranker_base_course_skill_tuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bge_reranker_base_course_skill_tuned_pipeline pipeline XlmRoBertaForSequenceClassification from isy-thl +author: John Snow Labs +name: bge_reranker_base_course_skill_tuned_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bge_reranker_base_course_skill_tuned_pipeline` is a English model originally trained by isy-thl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bge_reranker_base_course_skill_tuned_pipeline_en_5.5.0_3.0_1725616888403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bge_reranker_base_course_skill_tuned_pipeline_en_5.5.0_3.0_1725616888403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bge_reranker_base_course_skill_tuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bge_reranker_base_course_skill_tuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bge_reranker_base_course_skill_tuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|994.3 MB| + +## References + +https://huggingface.co/isy-thl/bge-reranker-base-course-skill-tuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bias_classifier_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-bias_classifier_roberta_en.md new file mode 100644 index 00000000000000..602749faed2149 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bias_classifier_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bias_classifier_roberta RoBertaForSequenceClassification from wu981526092 +author: John Snow Labs +name: bias_classifier_roberta +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bias_classifier_roberta` is a English model originally trained by wu981526092. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bias_classifier_roberta_en_5.5.0_3.0_1725613189866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bias_classifier_roberta_en_5.5.0_3.0_1725613189866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("bias_classifier_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("bias_classifier_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bias_classifier_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|457.6 MB| + +## References + +https://huggingface.co/wu981526092/bias_classifier_roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_en.md b/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_en.md new file mode 100644 index 00000000000000..4e6036c68ed4f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bioclinicalbert_full_finetuned_ner_pablo BertForTokenClassification from pabRomero +author: John Snow Labs +name: bioclinicalbert_full_finetuned_ner_pablo +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioclinicalbert_full_finetuned_ner_pablo` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioclinicalbert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725600828926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioclinicalbert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725600828926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bioclinicalbert_full_finetuned_ner_pablo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bioclinicalbert_full_finetuned_ner_pablo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioclinicalbert_full_finetuned_ner_pablo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/pabRomero/BioClinicalBERT-full-finetuned-ner-pablo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_pipeline_en.md new file mode 100644 index 00000000000000..1433c9129fff5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bioclinicalbert_full_finetuned_ner_pablo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bioclinicalbert_full_finetuned_ner_pablo_pipeline pipeline BertForTokenClassification from pabRomero +author: John Snow Labs +name: bioclinicalbert_full_finetuned_ner_pablo_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioclinicalbert_full_finetuned_ner_pablo_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioclinicalbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725600848810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioclinicalbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725600848810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bioclinicalbert_full_finetuned_ner_pablo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bioclinicalbert_full_finetuned_ner_pablo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioclinicalbert_full_finetuned_ner_pablo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/pabRomero/BioClinicalBERT-full-finetuned-ner-pablo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-boolq_microsoft_deberta_v3_base_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-boolq_microsoft_deberta_v3_base_seed_1_en.md new file mode 100644 index 00000000000000..a650b226270460 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-boolq_microsoft_deberta_v3_base_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English boolq_microsoft_deberta_v3_base_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: boolq_microsoft_deberta_v3_base_seed_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`boolq_microsoft_deberta_v3_base_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/boolq_microsoft_deberta_v3_base_seed_1_en_5.5.0_3.0_1725611377684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/boolq_microsoft_deberta_v3_base_seed_1_en_5.5.0_3.0_1725611377684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("boolq_microsoft_deberta_v3_base_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("boolq_microsoft_deberta_v3_base_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|boolq_microsoft_deberta_v3_base_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|620.1 MB| + +## References + +https://huggingface.co/utahnlp/boolq_microsoft_deberta-v3-base_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_en.md b/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_en.md new file mode 100644 index 00000000000000..525ad4c6c85b5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bowdpr_marco BertEmbeddings from bowdpr +author: John Snow Labs +name: bowdpr_marco +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bowdpr_marco` is a English model originally trained by bowdpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bowdpr_marco_en_5.5.0_3.0_1725617894974.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bowdpr_marco_en_5.5.0_3.0_1725617894974.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bowdpr_marco","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bowdpr_marco","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bowdpr_marco| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/bowdpr/bowdpr_marco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_pipeline_en.md new file mode 100644 index 00000000000000..5fdf2edd30ad5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bowdpr_marco_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bowdpr_marco_pipeline pipeline BertEmbeddings from bowdpr +author: John Snow Labs +name: bowdpr_marco_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bowdpr_marco_pipeline` is a English model originally trained by bowdpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bowdpr_marco_pipeline_en_5.5.0_3.0_1725617914882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bowdpr_marco_pipeline_en_5.5.0_3.0_1725617914882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bowdpr_marco_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bowdpr_marco_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bowdpr_marco_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/bowdpr/bowdpr_marco + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_es.md b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_es.md new file mode 100644 index 00000000000000..699f36069dd7da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_ehealth_kd RoBertaForTokenClassification from IIC +author: John Snow Labs +name: bsc_bio_ehr_spanish_ehealth_kd +date: 2024-09-06 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_ehealth_kd` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_ehealth_kd_es_5.5.0_3.0_1725638141925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_ehealth_kd_es_5.5.0_3.0_1725638141925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_ehealth_kd","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_ehealth_kd", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_ehealth_kd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|415.5 MB| + +## References + +https://huggingface.co/IIC/bsc-bio-ehr-es-ehealth_kd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_pipeline_es.md new file mode 100644 index 00000000000000..c8214cc7cc7887 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_ehealth_kd_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_ehealth_kd_pipeline pipeline RoBertaForTokenClassification from IIC +author: John Snow Labs +name: bsc_bio_ehr_spanish_ehealth_kd_pipeline +date: 2024-09-06 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_ehealth_kd_pipeline` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_ehealth_kd_pipeline_es_5.5.0_3.0_1725638183263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_ehealth_kd_pipeline_es_5.5.0_3.0_1725638183263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_ehealth_kd_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_ehealth_kd_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_ehealth_kd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|415.5 MB| + +## References + +https://huggingface.co/IIC/bsc-bio-ehr-es-ehealth_kd + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_en.md new file mode 100644 index 00000000000000..8f6a43ff73d969 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_symptemist_75_ner RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_symptemist_75_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_symptemist_75_ner` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_75_ner_en_5.5.0_3.0_1725638375697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_75_ner_en_5.5.0_3.0_1725638375697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_symptemist_75_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_symptemist_75_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_symptemist_75_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|435.3 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-symptemist-75-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en.md new file mode 100644 index 00000000000000..4630eb990fe086 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_symptemist_75_ner_pipeline pipeline RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_symptemist_75_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_symptemist_75_ner_pipeline` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en_5.5.0_3.0_1725638409411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_75_ner_pipeline_en_5.5.0_3.0_1725638409411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_symptemist_75_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_symptemist_75_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_symptemist_75_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|435.4 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-symptemist-75-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_85_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_85_ner_en.md new file mode 100644 index 00000000000000..811a44b249cb62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-bsc_bio_ehr_spanish_symptemist_85_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_symptemist_85_ner RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_symptemist_85_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_symptemist_85_ner` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_85_ner_en_5.5.0_3.0_1725638417530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_85_ner_en_5.5.0_3.0_1725638417530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_symptemist_85_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_symptemist_85_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_symptemist_85_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|435.0 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-symptemist-85-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_en.md new file mode 100644 index 00000000000000..9d7ba0cceb8e08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_anon_model DistilBertForTokenClassification from jacobfortinmtl +author: John Snow Labs +name: burmese_anon_model +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_anon_model` is a English model originally trained by jacobfortinmtl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_anon_model_en_5.5.0_3.0_1725599510834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_anon_model_en_5.5.0_3.0_1725599510834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_anon_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_anon_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_anon_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/jacobfortinmtl/my_anon_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_pipeline_en.md new file mode 100644 index 00000000000000..3eae7b3f5adacb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_anon_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_anon_model_pipeline pipeline DistilBertForTokenClassification from jacobfortinmtl +author: John Snow Labs +name: burmese_anon_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_anon_model_pipeline` is a English model originally trained by jacobfortinmtl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_anon_model_pipeline_en_5.5.0_3.0_1725599524034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_anon_model_pipeline_en_5.5.0_3.0_1725599524034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_anon_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_anon_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_anon_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/jacobfortinmtl/my_anon_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_en.md new file mode 100644 index 00000000000000..925693c0fd73e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_ahmedssoliman DistilBertForSequenceClassification from AhmedSSoliman +author: John Snow Labs +name: burmese_awesome_model_ahmedssoliman +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_ahmedssoliman` is a English model originally trained by AhmedSSoliman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_ahmedssoliman_en_5.5.0_3.0_1725608044393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_ahmedssoliman_en_5.5.0_3.0_1725608044393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_ahmedssoliman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_ahmedssoliman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_ahmedssoliman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/AhmedSSoliman/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_pipeline_en.md new file mode 100644 index 00000000000000..e9016291b8b585 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_ahmedssoliman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_ahmedssoliman_pipeline pipeline DistilBertForSequenceClassification from AhmedSSoliman +author: John Snow Labs +name: burmese_awesome_model_ahmedssoliman_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_ahmedssoliman_pipeline` is a English model originally trained by AhmedSSoliman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_ahmedssoliman_pipeline_en_5.5.0_3.0_1725608056091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_ahmedssoliman_pipeline_en_5.5.0_3.0_1725608056091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_ahmedssoliman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_ahmedssoliman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_ahmedssoliman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/AhmedSSoliman/my_awesome_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_gamino_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_gamino_pipeline_en.md new file mode 100644 index 00000000000000..e67e64ab18195b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_model_gamino_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_gamino_pipeline pipeline DistilBertForSequenceClassification from gamino +author: John Snow Labs +name: burmese_awesome_model_gamino_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_gamino_pipeline` is a English model originally trained by gamino. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_gamino_pipeline_en_5.5.0_3.0_1725608335396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_gamino_pipeline_en_5.5.0_3.0_1725608335396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_gamino_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_gamino_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_gamino_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.0 MB| + +## References + +https://huggingface.co/gamino/my_awesome_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_punccap_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_punccap_model_en.md new file mode 100644 index 00000000000000..c7f00b01501a5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_punccap_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_punccap_model DistilBertForTokenClassification from wstcpyt1988 +author: John Snow Labs +name: burmese_awesome_punccap_model +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_punccap_model` is a English model originally trained by wstcpyt1988. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_punccap_model_en_5.5.0_3.0_1725653628504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_punccap_model_en_5.5.0_3.0_1725653628504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_punccap_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_punccap_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_punccap_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/wstcpyt1988/my_awesome_punccap_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_ahmad01010101_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_ahmad01010101_pipeline_en.md new file mode 100644 index 00000000000000..59051d53acd1ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_ahmad01010101_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_ahmad01010101_pipeline pipeline DistilBertForQuestionAnswering from ahmad01010101 +author: John Snow Labs +name: burmese_awesome_qa_model_ahmad01010101_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_ahmad01010101_pipeline` is a English model originally trained by ahmad01010101. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ahmad01010101_pipeline_en_5.5.0_3.0_1725654940858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ahmad01010101_pipeline_en_5.5.0_3.0_1725654940858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_ahmad01010101_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_ahmad01010101_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_ahmad01010101_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ahmad01010101/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_en.md new file mode 100644 index 00000000000000..aebe549a7426b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_balalalalala DistilBertForQuestionAnswering from balalalalala +author: John Snow Labs +name: burmese_awesome_qa_model_balalalalala +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_balalalalala` is a English model originally trained by balalalalala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_balalalalala_en_5.5.0_3.0_1725621723251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_balalalalala_en_5.5.0_3.0_1725621723251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_balalalalala","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_balalalalala", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_balalalalala| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/balalalalala/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_pipeline_en.md new file mode 100644 index 00000000000000..448dac315ef3b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_balalalalala_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_balalalalala_pipeline pipeline DistilBertForQuestionAnswering from balalalalala +author: John Snow Labs +name: burmese_awesome_qa_model_balalalalala_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_balalalalala_pipeline` is a English model originally trained by balalalalala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_balalalalala_pipeline_en_5.5.0_3.0_1725621735698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_balalalalala_pipeline_en_5.5.0_3.0_1725621735698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_balalalalala_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_balalalalala_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_balalalalala_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/balalalalala/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_chernoffface_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_chernoffface_pipeline_en.md new file mode 100644 index 00000000000000..9dcd23ad1a9840 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_chernoffface_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_chernoffface_pipeline pipeline DistilBertForQuestionAnswering from Chernoffface +author: John Snow Labs +name: burmese_awesome_qa_model_chernoffface_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_chernoffface_pipeline` is a English model originally trained by Chernoffface. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_chernoffface_pipeline_en_5.5.0_3.0_1725652153191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_chernoffface_pipeline_en_5.5.0_3.0_1725652153191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_chernoffface_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_chernoffface_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_chernoffface_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chernoffface/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_en.md new file mode 100644 index 00000000000000..0d932ac2ede902 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_diodiodada DistilBertForQuestionAnswering from diodiodada +author: John Snow Labs +name: burmese_awesome_qa_model_diodiodada +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_diodiodada` is a English model originally trained by diodiodada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_diodiodada_en_5.5.0_3.0_1725654900838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_diodiodada_en_5.5.0_3.0_1725654900838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_diodiodada","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_diodiodada", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_diodiodada| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/diodiodada/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_pipeline_en.md new file mode 100644 index 00000000000000..5309a19aefad16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_diodiodada_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_diodiodada_pipeline pipeline DistilBertForQuestionAnswering from diodiodada +author: John Snow Labs +name: burmese_awesome_qa_model_diodiodada_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_diodiodada_pipeline` is a English model originally trained by diodiodada. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_diodiodada_pipeline_en_5.5.0_3.0_1725654912611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_diodiodada_pipeline_en_5.5.0_3.0_1725654912611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_diodiodada_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_diodiodada_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_diodiodada_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/diodiodada/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_fede_ezeq_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_fede_ezeq_pipeline_en.md new file mode 100644 index 00000000000000..ded8f139ac4af1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_fede_ezeq_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_fede_ezeq_pipeline pipeline DistilBertForQuestionAnswering from Fede-ezeq +author: John Snow Labs +name: burmese_awesome_qa_model_fede_ezeq_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_fede_ezeq_pipeline` is a English model originally trained by Fede-ezeq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_fede_ezeq_pipeline_en_5.5.0_3.0_1725654371878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_fede_ezeq_pipeline_en_5.5.0_3.0_1725654371878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_fede_ezeq_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_fede_ezeq_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_fede_ezeq_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/Fede-ezeq/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_funailife_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_funailife_en.md new file mode 100644 index 00000000000000..681c7154b80438 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_funailife_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_funailife DistilBertForQuestionAnswering from FunAILife +author: John Snow Labs +name: burmese_awesome_qa_model_funailife +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_funailife` is a English model originally trained by FunAILife. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_funailife_en_5.5.0_3.0_1725622170639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_funailife_en_5.5.0_3.0_1725622170639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_funailife","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_funailife", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_funailife| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/FunAILife/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_jennydqmm_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_jennydqmm_en.md new file mode 100644 index 00000000000000..bb77173e966605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_jennydqmm_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_jennydqmm DistilBertForQuestionAnswering from JennyDQMM +author: John Snow Labs +name: burmese_awesome_qa_model_jennydqmm +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_jennydqmm` is a English model originally trained by JennyDQMM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jennydqmm_en_5.5.0_3.0_1725652308085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jennydqmm_en_5.5.0_3.0_1725652308085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jennydqmm","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jennydqmm", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_jennydqmm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JennyDQMM/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_en.md new file mode 100644 index 00000000000000..f9a4a33f55af4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_kasmirak DistilBertForQuestionAnswering from KasmiraK +author: John Snow Labs +name: burmese_awesome_qa_model_kasmirak +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_kasmirak` is a English model originally trained by KasmiraK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kasmirak_en_5.5.0_3.0_1725654355215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kasmirak_en_5.5.0_3.0_1725654355215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_kasmirak","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_kasmirak", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_kasmirak| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KasmiraK/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_pipeline_en.md new file mode 100644 index 00000000000000..85450c68cc121d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_kasmirak_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_kasmirak_pipeline pipeline DistilBertForQuestionAnswering from KasmiraK +author: John Snow Labs +name: burmese_awesome_qa_model_kasmirak_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_kasmirak_pipeline` is a English model originally trained by KasmiraK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kasmirak_pipeline_en_5.5.0_3.0_1725654367052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kasmirak_pipeline_en_5.5.0_3.0_1725654367052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_kasmirak_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_kasmirak_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_kasmirak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KasmiraK/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_lizhealey_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_lizhealey_en.md new file mode 100644 index 00000000000000..44c1713b62bc2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_lizhealey_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_lizhealey DistilBertForQuestionAnswering from lizhealey +author: John Snow Labs +name: burmese_awesome_qa_model_lizhealey +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_lizhealey` is a English model originally trained by lizhealey. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_lizhealey_en_5.5.0_3.0_1725652675398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_lizhealey_en_5.5.0_3.0_1725652675398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_lizhealey","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_lizhealey", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_lizhealey| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lizhealey/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_en.md new file mode 100644 index 00000000000000..12f2a5f326febe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_madsci DistilBertForQuestionAnswering from madsci +author: John Snow Labs +name: burmese_awesome_qa_model_madsci +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_madsci` is a English model originally trained by madsci. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_madsci_en_5.5.0_3.0_1725652093920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_madsci_en_5.5.0_3.0_1725652093920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_madsci","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_madsci", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_madsci| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/madsci/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_pipeline_en.md new file mode 100644 index 00000000000000..baf561c0eebbe9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_madsci_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_madsci_pipeline pipeline DistilBertForQuestionAnswering from madsci +author: John Snow Labs +name: burmese_awesome_qa_model_madsci_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_madsci_pipeline` is a English model originally trained by madsci. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_madsci_pipeline_en_5.5.0_3.0_1725652106220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_madsci_pipeline_en_5.5.0_3.0_1725652106220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_madsci_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_madsci_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_madsci_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/madsci/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_munnafaisal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_munnafaisal_pipeline_en.md new file mode 100644 index 00000000000000..ab7c5fec9ede1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_munnafaisal_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_munnafaisal_pipeline pipeline DistilBertForQuestionAnswering from Munnafaisal +author: John Snow Labs +name: burmese_awesome_qa_model_munnafaisal_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_munnafaisal_pipeline` is a English model originally trained by Munnafaisal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_munnafaisal_pipeline_en_5.5.0_3.0_1725652219679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_munnafaisal_pipeline_en_5.5.0_3.0_1725652219679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_munnafaisal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_munnafaisal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_munnafaisal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Munnafaisal/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_en.md new file mode 100644 index 00000000000000..059a72605e5f5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_nandyala12 DistilBertForQuestionAnswering from Nandyala12 +author: John Snow Labs +name: burmese_awesome_qa_model_nandyala12 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_nandyala12` is a English model originally trained by Nandyala12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_nandyala12_en_5.5.0_3.0_1725652775818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_nandyala12_en_5.5.0_3.0_1725652775818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_nandyala12","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_nandyala12", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_nandyala12| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Nandyala12/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_pipeline_en.md new file mode 100644 index 00000000000000..fce54587857001 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_nandyala12_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_nandyala12_pipeline pipeline DistilBertForQuestionAnswering from Nandyala12 +author: John Snow Labs +name: burmese_awesome_qa_model_nandyala12_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_nandyala12_pipeline` is a English model originally trained by Nandyala12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_nandyala12_pipeline_en_5.5.0_3.0_1725652787486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_nandyala12_pipeline_en_5.5.0_3.0_1725652787486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_nandyala12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_nandyala12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_nandyala12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Nandyala12/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_en.md new file mode 100644 index 00000000000000..ed36f36e676ced --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_navanjana DistilBertForQuestionAnswering from Navanjana +author: John Snow Labs +name: burmese_awesome_qa_model_navanjana +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_navanjana` is a English model originally trained by Navanjana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_navanjana_en_5.5.0_3.0_1725621484696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_navanjana_en_5.5.0_3.0_1725621484696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_navanjana","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_navanjana", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_navanjana| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Navanjana/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_pipeline_en.md new file mode 100644 index 00000000000000..f5671b8b53eec2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_navanjana_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_navanjana_pipeline pipeline DistilBertForQuestionAnswering from Navanjana +author: John Snow Labs +name: burmese_awesome_qa_model_navanjana_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_navanjana_pipeline` is a English model originally trained by Navanjana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_navanjana_pipeline_en_5.5.0_3.0_1725621496962.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_navanjana_pipeline_en_5.5.0_3.0_1725621496962.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_navanjana_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_navanjana_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_navanjana_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Navanjana/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_pechka_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_pechka_en.md new file mode 100644 index 00000000000000..5d3be55a1665ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_pechka_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_pechka DistilBertForQuestionAnswering from Pechka +author: John Snow Labs +name: burmese_awesome_qa_model_pechka +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_pechka` is a English model originally trained by Pechka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_pechka_en_5.5.0_3.0_1725622017211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_pechka_en_5.5.0_3.0_1725622017211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_pechka","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_pechka", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_pechka| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Pechka/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_rentao_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_rentao_en.md new file mode 100644 index 00000000000000..71f210f8ee8a73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_rentao_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_rentao DistilBertForQuestionAnswering from Rentao +author: John Snow Labs +name: burmese_awesome_qa_model_rentao +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_rentao` is a English model originally trained by Rentao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rentao_en_5.5.0_3.0_1725621906979.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rentao_en_5.5.0_3.0_1725621906979.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_rentao","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_rentao", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_rentao| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Rentao/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_robinsh2023_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_robinsh2023_pipeline_en.md new file mode 100644 index 00000000000000..41c6165c4951f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_robinsh2023_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_robinsh2023_pipeline pipeline DistilBertForQuestionAnswering from Robinsh2023 +author: John Snow Labs +name: burmese_awesome_qa_model_robinsh2023_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_robinsh2023_pipeline` is a English model originally trained by Robinsh2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_robinsh2023_pipeline_en_5.5.0_3.0_1725621607614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_robinsh2023_pipeline_en_5.5.0_3.0_1725621607614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_robinsh2023_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_robinsh2023_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_robinsh2023_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Robinsh2023/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_en.md new file mode 100644 index 00000000000000..f587ded9bd6b93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_simranjeet97 DistilBertForQuestionAnswering from simranjeet97 +author: John Snow Labs +name: burmese_awesome_qa_model_simranjeet97 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_simranjeet97` is a English model originally trained by simranjeet97. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_simranjeet97_en_5.5.0_3.0_1725652472421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_simranjeet97_en_5.5.0_3.0_1725652472421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_simranjeet97","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_simranjeet97", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_simranjeet97| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/simranjeet97/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_pipeline_en.md new file mode 100644 index 00000000000000..4a86deab146531 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_simranjeet97_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_simranjeet97_pipeline pipeline DistilBertForQuestionAnswering from simranjeet97 +author: John Snow Labs +name: burmese_awesome_qa_model_simranjeet97_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_simranjeet97_pipeline` is a English model originally trained by simranjeet97. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_simranjeet97_pipeline_en_5.5.0_3.0_1725652488392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_simranjeet97_pipeline_en_5.5.0_3.0_1725652488392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_simranjeet97_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_simranjeet97_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_simranjeet97_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/simranjeet97/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_speng123_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_speng123_en.md new file mode 100644 index 00000000000000..b26b198b94351b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_speng123_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_speng123 DistilBertForQuestionAnswering from SPENG123 +author: John Snow Labs +name: burmese_awesome_qa_model_speng123 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_speng123` is a English model originally trained by SPENG123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_speng123_en_5.5.0_3.0_1725654639549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_speng123_en_5.5.0_3.0_1725654639549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_speng123","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_speng123", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_speng123| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SPENG123/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_en.md new file mode 100644 index 00000000000000..2d855b95cf760b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_stevhliu DistilBertForQuestionAnswering from stevhliu +author: John Snow Labs +name: burmese_awesome_qa_model_stevhliu +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_stevhliu` is a English model originally trained by stevhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_stevhliu_en_5.5.0_3.0_1725652093737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_stevhliu_en_5.5.0_3.0_1725652093737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_stevhliu","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_stevhliu", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_stevhliu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/stevhliu/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_pipeline_en.md new file mode 100644 index 00000000000000..58c0ec571321ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_stevhliu_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_stevhliu_pipeline pipeline DistilBertForQuestionAnswering from stevhliu +author: John Snow Labs +name: burmese_awesome_qa_model_stevhliu_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_stevhliu_pipeline` is a English model originally trained by stevhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_stevhliu_pipeline_en_5.5.0_3.0_1725652108830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_stevhliu_pipeline_en_5.5.0_3.0_1725652108830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_stevhliu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_stevhliu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_stevhliu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/stevhliu/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_walter133_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_walter133_pipeline_en.md new file mode 100644 index 00000000000000..f5e265e97516e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_walter133_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_walter133_pipeline pipeline DistilBertForQuestionAnswering from walter133 +author: John Snow Labs +name: burmese_awesome_qa_model_walter133_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_walter133_pipeline` is a English model originally trained by walter133. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_walter133_pipeline_en_5.5.0_3.0_1725621832278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_walter133_pipeline_en_5.5.0_3.0_1725621832278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_walter133_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_walter133_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_walter133_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/walter133/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_wwhheerree_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_wwhheerree_pipeline_en.md new file mode 100644 index 00000000000000..74e12e5ec0c715 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_wwhheerree_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_wwhheerree_pipeline pipeline DistilBertForQuestionAnswering from wwhheerree +author: John Snow Labs +name: burmese_awesome_qa_model_wwhheerree_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_wwhheerree_pipeline` is a English model originally trained by wwhheerree. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_wwhheerree_pipeline_en_5.5.0_3.0_1725652661876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_wwhheerree_pipeline_en_5.5.0_3.0_1725652661876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_wwhheerree_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_wwhheerree_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_wwhheerree_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wwhheerree/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_en.md new file mode 100644 index 00000000000000..42296913e374a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_yangyangsong DistilBertForQuestionAnswering from YangyangSong +author: John Snow Labs +name: burmese_awesome_qa_model_yangyangsong +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_yangyangsong` is a English model originally trained by YangyangSong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_yangyangsong_en_5.5.0_3.0_1725654861633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_yangyangsong_en_5.5.0_3.0_1725654861633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_yangyangsong","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_yangyangsong", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_yangyangsong| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/YangyangSong/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_pipeline_en.md new file mode 100644 index 00000000000000..aa9d0cb492954a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_qa_model_yangyangsong_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_yangyangsong_pipeline pipeline DistilBertForQuestionAnswering from YangyangSong +author: John Snow Labs +name: burmese_awesome_qa_model_yangyangsong_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_yangyangsong_pipeline` is a English model originally trained by YangyangSong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_yangyangsong_pipeline_en_5.5.0_3.0_1725654873443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_yangyangsong_pipeline_en_5.5.0_3.0_1725654873443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_yangyangsong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_yangyangsong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_yangyangsong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/YangyangSong/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_en.md new file mode 100644 index 00000000000000..699c2ce65bd9f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_setfit_model1 MPNetEmbeddings from ilhkn +author: John Snow Labs +name: burmese_awesome_setfit_model1 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_setfit_model1` is a English model originally trained by ilhkn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model1_en_5.5.0_3.0_1725594896905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model1_en_5.5.0_3.0_1725594896905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("burmese_awesome_setfit_model1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("burmese_awesome_setfit_model1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_setfit_model1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ilhkn/my-awesome-setfit-model1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_pipeline_en.md new file mode 100644 index 00000000000000..7ccdd6ad005830 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_setfit_model1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_setfit_model1_pipeline pipeline MPNetEmbeddings from ilhkn +author: John Snow Labs +name: burmese_awesome_setfit_model1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_setfit_model1_pipeline` is a English model originally trained by ilhkn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model1_pipeline_en_5.5.0_3.0_1725594925060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model1_pipeline_en_5.5.0_3.0_1725594925060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_setfit_model1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_setfit_model1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_setfit_model1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ilhkn/my-awesome-setfit-model1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_en.md new file mode 100644 index 00000000000000..22cfa145645ca4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_token_classification_v2_1_2 XlmRoBertaForTokenClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_token_classification_v2_1_2 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_token_classification_v2_1_2` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_2_en_5.5.0_3.0_1725656380309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_2_en_5.5.0_3.0_1725656380309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("burmese_awesome_token_classification_v2_1_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("burmese_awesome_token_classification_v2_1_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_token_classification_v2_1_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|830.8 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_token_classification_v2.1.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_pipeline_en.md new file mode 100644 index 00000000000000..7bcb119f0321a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_token_classification_v2_1_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_token_classification_v2_1_2_pipeline pipeline XlmRoBertaForTokenClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_token_classification_v2_1_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_token_classification_v2_1_2_pipeline` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_2_pipeline_en_5.5.0_3.0_1725656488802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_2_pipeline_en_5.5.0_3.0_1725656488802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_token_classification_v2_1_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_token_classification_v2_1_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_token_classification_v2_1_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|830.8 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_token_classification_v2.1.2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_en.md new file mode 100644 index 00000000000000..0ccce8e512ece1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_all_saprotection DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_all_saprotection +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_all_saprotection` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_saprotection_en_5.5.0_3.0_1725653978789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_saprotection_en_5.5.0_3.0_1725653978789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_all_saprotection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_all_saprotection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_all_saprotection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_all_SAprotection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_pipeline_en.md new file mode 100644 index 00000000000000..bf662e3bcb57f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_all_saprotection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_all_saprotection_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_all_saprotection_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_all_saprotection_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_saprotection_pipeline_en_5.5.0_3.0_1725653990761.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_saprotection_pipeline_en_5.5.0_3.0_1725653990761.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_all_saprotection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_all_saprotection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_all_saprotection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_all_SAprotection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_en.md new file mode 100644 index 00000000000000..888b964e499f88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_jpr_gonzalezrostani DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_jpr_gonzalezrostani +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_jpr_gonzalezrostani` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jpr_gonzalezrostani_en_5.5.0_3.0_1725653314745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jpr_gonzalezrostani_en_5.5.0_3.0_1725653314745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_jpr_gonzalezrostani","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_jpr_gonzalezrostani", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_jpr_gonzalezrostani| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_JPr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en.md new file mode 100644 index 00000000000000..469f586e4b5b5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_jpr_gonzalezrostani_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_jpr_gonzalezrostani_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_jpr_gonzalezrostani_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en_5.5.0_3.0_1725653327177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jpr_gonzalezrostani_pipeline_en_5.5.0_3.0_1725653327177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_jpr_gonzalezrostani_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_jpr_gonzalezrostani_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_jpr_gonzalezrostani_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_JPr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jquali_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jquali_pipeline_en.md new file mode 100644 index 00000000000000..3aacafd58a506d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_jquali_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_jquali_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_jquali_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_jquali_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jquali_pipeline_en_5.5.0_3.0_1725653956237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_jquali_pipeline_en_5.5.0_3.0_1725653956237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_jquali_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_jquali_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_jquali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_JQuali + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_en.md new file mode 100644 index 00000000000000..945aeb4b775047 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_2_majyoarisu DistilBertForTokenClassification from majyoarisu +author: John Snow Labs +name: burmese_awesome_wnut_model_2_majyoarisu +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_2_majyoarisu` is a English model originally trained by majyoarisu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_2_majyoarisu_en_5.5.0_3.0_1725653287008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_2_majyoarisu_en_5.5.0_3.0_1725653287008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_2_majyoarisu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_2_majyoarisu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_2_majyoarisu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/majyoarisu/my_awesome_wnut_model_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_pipeline_en.md new file mode 100644 index 00000000000000..8b49fd63c0a584 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_2_majyoarisu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_2_majyoarisu_pipeline pipeline DistilBertForTokenClassification from majyoarisu +author: John Snow Labs +name: burmese_awesome_wnut_model_2_majyoarisu_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_2_majyoarisu_pipeline` is a English model originally trained by majyoarisu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_2_majyoarisu_pipeline_en_5.5.0_3.0_1725653303749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_2_majyoarisu_pipeline_en_5.5.0_3.0_1725653303749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_2_majyoarisu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_2_majyoarisu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_2_majyoarisu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/majyoarisu/my_awesome_wnut_model_2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_adalee1001_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_adalee1001_pipeline_en.md new file mode 100644 index 00000000000000..eb6f717a480c3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_adalee1001_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_adalee1001_pipeline pipeline DistilBertForTokenClassification from Adalee1001 +author: John Snow Labs +name: burmese_awesome_wnut_model_adalee1001_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_adalee1001_pipeline` is a English model originally trained by Adalee1001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_adalee1001_pipeline_en_5.5.0_3.0_1725599598163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_adalee1001_pipeline_en_5.5.0_3.0_1725599598163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_adalee1001_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_adalee1001_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_adalee1001_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Adalee1001/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_en.md new file mode 100644 index 00000000000000..e8855cc4221949 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_casestudent DistilBertForTokenClassification from CaseStudent +author: John Snow Labs +name: burmese_awesome_wnut_model_casestudent +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_casestudent` is a English model originally trained by CaseStudent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casestudent_en_5.5.0_3.0_1725653737161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casestudent_en_5.5.0_3.0_1725653737161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_casestudent","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_casestudent", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_casestudent| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/CaseStudent/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_pipeline_en.md new file mode 100644 index 00000000000000..99dd0a72cc618c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_casestudent_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_casestudent_pipeline pipeline DistilBertForTokenClassification from CaseStudent +author: John Snow Labs +name: burmese_awesome_wnut_model_casestudent_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_casestudent_pipeline` is a English model originally trained by CaseStudent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casestudent_pipeline_en_5.5.0_3.0_1725653749039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_casestudent_pipeline_en_5.5.0_3.0_1725653749039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_casestudent_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_casestudent_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_casestudent_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/CaseStudent/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_dlim12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_dlim12_pipeline_en.md new file mode 100644 index 00000000000000..e7c17f5670c6ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_dlim12_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_dlim12_pipeline pipeline DistilBertForTokenClassification from dlim12 +author: John Snow Labs +name: burmese_awesome_wnut_model_dlim12_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_dlim12_pipeline` is a English model originally trained by dlim12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_dlim12_pipeline_en_5.5.0_3.0_1725599383762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_dlim12_pipeline_en_5.5.0_3.0_1725599383762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_dlim12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_dlim12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_dlim12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dlim12/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_en.md new file mode 100644 index 00000000000000..174f0ff0eebbfe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hoaan2003 DistilBertForTokenClassification from HoaAn2003 +author: John Snow Labs +name: burmese_awesome_wnut_model_hoaan2003 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hoaan2003` is a English model originally trained by HoaAn2003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hoaan2003_en_5.5.0_3.0_1725599487073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hoaan2003_en_5.5.0_3.0_1725599487073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_hoaan2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_hoaan2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hoaan2003| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/HoaAn2003/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_pipeline_en.md new file mode 100644 index 00000000000000..6cdcb039f7959b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_hoaan2003_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_hoaan2003_pipeline pipeline DistilBertForTokenClassification from HoaAn2003 +author: John Snow Labs +name: burmese_awesome_wnut_model_hoaan2003_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_hoaan2003_pipeline` is a English model originally trained by HoaAn2003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hoaan2003_pipeline_en_5.5.0_3.0_1725599500301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_hoaan2003_pipeline_en_5.5.0_3.0_1725599500301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_hoaan2003_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_hoaan2003_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_hoaan2003_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/HoaAn2003/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_en.md new file mode 100644 index 00000000000000..b8fdb783213b0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jarvisx17 DistilBertForTokenClassification from jarvisx17 +author: John Snow Labs +name: burmese_awesome_wnut_model_jarvisx17 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jarvisx17` is a English model originally trained by jarvisx17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jarvisx17_en_5.5.0_3.0_1725598974585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jarvisx17_en_5.5.0_3.0_1725598974585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jarvisx17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jarvisx17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jarvisx17| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jarvisx17/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_pipeline_en.md new file mode 100644 index 00000000000000..486176db1f2d4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jarvisx17_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jarvisx17_pipeline pipeline DistilBertForTokenClassification from jarvisx17 +author: John Snow Labs +name: burmese_awesome_wnut_model_jarvisx17_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jarvisx17_pipeline` is a English model originally trained by jarvisx17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jarvisx17_pipeline_en_5.5.0_3.0_1725598992697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jarvisx17_pipeline_en_5.5.0_3.0_1725598992697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_jarvisx17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_jarvisx17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jarvisx17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jarvisx17/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_en.md new file mode 100644 index 00000000000000..3648dc1bad4476 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jsalafranca DistilBertForTokenClassification from jsalafranca +author: John Snow Labs +name: burmese_awesome_wnut_model_jsalafranca +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jsalafranca` is a English model originally trained by jsalafranca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jsalafranca_en_5.5.0_3.0_1725599778925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jsalafranca_en_5.5.0_3.0_1725599778925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jsalafranca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_jsalafranca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jsalafranca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jsalafranca/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_pipeline_en.md new file mode 100644 index 00000000000000..9f5fabc039894b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_jsalafranca_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_jsalafranca_pipeline pipeline DistilBertForTokenClassification from jsalafranca +author: John Snow Labs +name: burmese_awesome_wnut_model_jsalafranca_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_jsalafranca_pipeline` is a English model originally trained by jsalafranca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jsalafranca_pipeline_en_5.5.0_3.0_1725599791022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_jsalafranca_pipeline_en_5.5.0_3.0_1725599791022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_jsalafranca_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_jsalafranca_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_jsalafranca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jsalafranca/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_mhdkj_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_mhdkj_en.md new file mode 100644 index 00000000000000..d9a813c92f964f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_mhdkj_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_mhdkj DistilBertForTokenClassification from mhdkj +author: John Snow Labs +name: burmese_awesome_wnut_model_mhdkj +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_mhdkj` is a English model originally trained by mhdkj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_mhdkj_en_5.5.0_3.0_1725653776998.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_mhdkj_en_5.5.0_3.0_1725653776998.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_mhdkj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_mhdkj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_mhdkj| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mhdkj/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_en.md new file mode 100644 index 00000000000000..fa0be4325d9d74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_povhal DistilBertForTokenClassification from povhal +author: John Snow Labs +name: burmese_awesome_wnut_model_povhal +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_povhal` is a English model originally trained by povhal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_povhal_en_5.5.0_3.0_1725599723845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_povhal_en_5.5.0_3.0_1725599723845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_povhal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_povhal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_povhal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|240.1 MB| + +## References + +https://huggingface.co/povhal/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_pipeline_en.md new file mode 100644 index 00000000000000..768e0fef6157fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_povhal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_povhal_pipeline pipeline DistilBertForTokenClassification from povhal +author: John Snow Labs +name: burmese_awesome_wnut_model_povhal_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_povhal_pipeline` is a English model originally trained by povhal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_povhal_pipeline_en_5.5.0_3.0_1725599737772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_povhal_pipeline_en_5.5.0_3.0_1725599737772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_povhal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_povhal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_povhal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|240.1 MB| + +## References + +https://huggingface.co/povhal/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_en.md new file mode 100644 index 00000000000000..9936185c04557f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_roeizucker DistilBertForTokenClassification from roeizucker +author: John Snow Labs +name: burmese_awesome_wnut_model_roeizucker +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_roeizucker` is a English model originally trained by roeizucker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_roeizucker_en_5.5.0_3.0_1725653843283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_roeizucker_en_5.5.0_3.0_1725653843283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_roeizucker","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_roeizucker", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_roeizucker| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/roeizucker/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_pipeline_en.md new file mode 100644 index 00000000000000..e4bf5d672c4d6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_roeizucker_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_roeizucker_pipeline pipeline DistilBertForTokenClassification from roeizucker +author: John Snow Labs +name: burmese_awesome_wnut_model_roeizucker_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_roeizucker_pipeline` is a English model originally trained by roeizucker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_roeizucker_pipeline_en_5.5.0_3.0_1725653855055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_roeizucker_pipeline_en_5.5.0_3.0_1725653855055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_roeizucker_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_roeizucker_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_roeizucker_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/roeizucker/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ttnksm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ttnksm_pipeline_en.md new file mode 100644 index 00000000000000..013bdbd494b9d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ttnksm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_ttnksm_pipeline pipeline DistilBertForTokenClassification from ttnksm +author: John Snow Labs +name: burmese_awesome_wnut_model_ttnksm_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_ttnksm_pipeline` is a English model originally trained by ttnksm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ttnksm_pipeline_en_5.5.0_3.0_1725654082228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ttnksm_pipeline_en_5.5.0_3.0_1725654082228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_ttnksm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_ttnksm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_ttnksm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ttnksm/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yannik_646_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yannik_646_pipeline_en.md new file mode 100644 index 00000000000000..30b38f86ef139f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yannik_646_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yannik_646_pipeline pipeline DistilBertForTokenClassification from yannik-646 +author: John Snow Labs +name: burmese_awesome_wnut_model_yannik_646_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yannik_646_pipeline` is a English model originally trained by yannik-646. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yannik_646_pipeline_en_5.5.0_3.0_1725598995494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yannik_646_pipeline_en_5.5.0_3.0_1725598995494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_yannik_646_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_yannik_646_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yannik_646_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yannik-646/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_en.md new file mode 100644 index 00000000000000..e398fcad8374ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_ydc13 DistilBertForTokenClassification from ydc13 +author: John Snow Labs +name: burmese_awesome_wnut_model_ydc13 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_ydc13` is a English model originally trained by ydc13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ydc13_en_5.5.0_3.0_1725598974589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ydc13_en_5.5.0_3.0_1725598974589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_ydc13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_ydc13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_ydc13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ydc13/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_pipeline_en.md new file mode 100644 index 00000000000000..9964d1a8eacc35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_ydc13_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_ydc13_pipeline pipeline DistilBertForTokenClassification from ydc13 +author: John Snow Labs +name: burmese_awesome_wnut_model_ydc13_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_ydc13_pipeline` is a English model originally trained by ydc13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ydc13_pipeline_en_5.5.0_3.0_1725598992574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_ydc13_pipeline_en_5.5.0_3.0_1725598992574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_ydc13_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_ydc13_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_ydc13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ydc13/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_en.md new file mode 100644 index 00000000000000..02a4c9b1237dc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yunjinchoi DistilBertForTokenClassification from yunjinchoi +author: John Snow Labs +name: burmese_awesome_wnut_model_yunjinchoi +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yunjinchoi` is a English model originally trained by yunjinchoi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yunjinchoi_en_5.5.0_3.0_1725653834988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yunjinchoi_en_5.5.0_3.0_1725653834988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yunjinchoi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_yunjinchoi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yunjinchoi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yunjinchoi/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_pipeline_en.md new file mode 100644 index 00000000000000..52c0a8db319690 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_yunjinchoi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_yunjinchoi_pipeline pipeline DistilBertForTokenClassification from yunjinchoi +author: John Snow Labs +name: burmese_awesome_wnut_model_yunjinchoi_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_yunjinchoi_pipeline` is a English model originally trained by yunjinchoi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yunjinchoi_pipeline_en_5.5.0_3.0_1725653846907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_yunjinchoi_pipeline_en_5.5.0_3.0_1725653846907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_yunjinchoi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_yunjinchoi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_yunjinchoi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/yunjinchoi/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_en.md new file mode 100644 index 00000000000000..6214fe851de02e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_zanche DistilBertForTokenClassification from zanche +author: John Snow Labs +name: burmese_awesome_wnut_model_zanche +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_zanche` is a English model originally trained by zanche. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_zanche_en_5.5.0_3.0_1725653473293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_zanche_en_5.5.0_3.0_1725653473293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_zanche","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_zanche", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_zanche| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/zanche/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_pipeline_en.md new file mode 100644 index 00000000000000..ce03aad4fcbd72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_awesome_wnut_model_zanche_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_zanche_pipeline pipeline DistilBertForTokenClassification from zanche +author: John Snow Labs +name: burmese_awesome_wnut_model_zanche_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_zanche_pipeline` is a English model originally trained by zanche. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_zanche_pipeline_en_5.5.0_3.0_1725653485264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_zanche_pipeline_en_5.5.0_3.0_1725653485264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_zanche_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_zanche_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_zanche_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/zanche/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_en.md new file mode 100644 index 00000000000000..74a93e21346cdd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_dist_nepal_bhasa_model DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: burmese_dist_nepal_bhasa_model +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_dist_nepal_bhasa_model` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_dist_nepal_bhasa_model_en_5.5.0_3.0_1725598974602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_dist_nepal_bhasa_model_en_5.5.0_3.0_1725598974602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_dist_nepal_bhasa_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_dist_nepal_bhasa_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_dist_nepal_bhasa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/my_dist_new_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_pipeline_en.md new file mode 100644 index 00000000000000..4bc524f20b4417 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_dist_nepal_bhasa_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_dist_nepal_bhasa_model_pipeline pipeline DistilBertForTokenClassification from bhadauriaupendra062 +author: John Snow Labs +name: burmese_dist_nepal_bhasa_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_dist_nepal_bhasa_model_pipeline` is a English model originally trained by bhadauriaupendra062. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_dist_nepal_bhasa_model_pipeline_en_5.5.0_3.0_1725598995366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_dist_nepal_bhasa_model_pipeline_en_5.5.0_3.0_1725598995366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_dist_nepal_bhasa_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_dist_nepal_bhasa_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_dist_nepal_bhasa_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhadauriaupendra062/my_dist_new_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_fine_tuning_opus_maltese_english_vietnamese_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_fine_tuning_opus_maltese_english_vietnamese_model_en.md new file mode 100644 index 00000000000000..9922c23819506b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_fine_tuning_opus_maltese_english_vietnamese_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_fine_tuning_opus_maltese_english_vietnamese_model MarianTransformer from Kudod +author: John Snow Labs +name: burmese_fine_tuning_opus_maltese_english_vietnamese_model +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_fine_tuning_opus_maltese_english_vietnamese_model` is a English model originally trained by Kudod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_fine_tuning_opus_maltese_english_vietnamese_model_en_5.5.0_3.0_1725636372907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_fine_tuning_opus_maltese_english_vietnamese_model_en_5.5.0_3.0_1725636372907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("burmese_fine_tuning_opus_maltese_english_vietnamese_model","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("burmese_fine_tuning_opus_maltese_english_vietnamese_model","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_fine_tuning_opus_maltese_english_vietnamese_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|474.7 MB| + +## References + +https://huggingface.co/Kudod/my_fine_tuning_opus_mt_en_vi_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_en.md new file mode 100644 index 00000000000000..b1719e4f1588c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_model_onsba DistilBertForQuestionAnswering from onsba +author: John Snow Labs +name: burmese_model_onsba +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_model_onsba` is a English model originally trained by onsba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_model_onsba_en_5.5.0_3.0_1725622037300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_model_onsba_en_5.5.0_3.0_1725622037300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_model_onsba","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_model_onsba", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_model_onsba| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/onsba/my_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_pipeline_en.md new file mode 100644 index 00000000000000..9331aba1caeea8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_model_onsba_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_model_onsba_pipeline pipeline DistilBertForQuestionAnswering from onsba +author: John Snow Labs +name: burmese_model_onsba_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_model_onsba_pipeline` is a English model originally trained by onsba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_model_onsba_pipeline_en_5.5.0_3.0_1725622049012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_model_onsba_pipeline_en_5.5.0_3.0_1725622049012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_model_onsba_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_model_onsba_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_model_onsba_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/onsba/my_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_ner_model_mido545_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_ner_model_mido545_pipeline_en.md new file mode 100644 index 00000000000000..8bfd5262cfb0b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_ner_model_mido545_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_ner_model_mido545_pipeline pipeline DistilBertForTokenClassification from mido545 +author: John Snow Labs +name: burmese_ner_model_mido545_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ner_model_mido545_pipeline` is a English model originally trained by mido545. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ner_model_mido545_pipeline_en_5.5.0_3.0_1725653668802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ner_model_mido545_pipeline_en_5.5.0_3.0_1725653668802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_ner_model_mido545_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_ner_model_mido545_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ner_model_mido545_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/mido545/my_ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_nmt_model_ad_iiitd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_nmt_model_ad_iiitd_pipeline_en.md new file mode 100644 index 00000000000000..d88c803af26bce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_nmt_model_ad_iiitd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_nmt_model_ad_iiitd_pipeline pipeline MarianTransformer from AD-IIITD +author: John Snow Labs +name: burmese_nmt_model_ad_iiitd_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_nmt_model_ad_iiitd_pipeline` is a English model originally trained by AD-IIITD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_nmt_model_ad_iiitd_pipeline_en_5.5.0_3.0_1725635379284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_nmt_model_ad_iiitd_pipeline_en_5.5.0_3.0_1725635379284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_nmt_model_ad_iiitd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_nmt_model_ad_iiitd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_nmt_model_ad_iiitd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|500.6 MB| + +## References + +https://huggingface.co/AD-IIITD/my_NMT_model + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-burmese_ws_extraction_model_26th_mar_en.md b/docs/_posts/ahmedlone127/2024-09-06-burmese_ws_extraction_model_26th_mar_en.md new file mode 100644 index 00000000000000..f0b9fab34602d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-burmese_ws_extraction_model_26th_mar_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_ws_extraction_model_26th_mar DistilBertForTokenClassification from manimaranpa07 +author: John Snow Labs +name: burmese_ws_extraction_model_26th_mar +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_ws_extraction_model_26th_mar` is a English model originally trained by manimaranpa07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_ws_extraction_model_26th_mar_en_5.5.0_3.0_1725599737716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_ws_extraction_model_26th_mar_en_5.5.0_3.0_1725599737716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ws_extraction_model_26th_mar","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_ws_extraction_model_26th_mar", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_ws_extraction_model_26th_mar| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/manimaranpa07/my_Ws_extraction_model_26th_mar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_en.md b/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_en.md new file mode 100644 index 00000000000000..985321c05b4170 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English camembert_base_test_model_sophios CamemBertEmbeddings from sophios +author: John Snow Labs +name: camembert_base_test_model_sophios +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_base_test_model_sophios` is a English model originally trained by sophios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_test_model_sophios_en_5.5.0_3.0_1725632163233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_test_model_sophios_en_5.5.0_3.0_1725632163233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("camembert_base_test_model_sophios","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("camembert_base_test_model_sophios","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_test_model_sophios| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sophios/camembert-base-test-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_pipeline_en.md new file mode 100644 index 00000000000000..a78236d1edc2a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-camembert_base_test_model_sophios_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English camembert_base_test_model_sophios_pipeline pipeline CamemBertEmbeddings from sophios +author: John Snow Labs +name: camembert_base_test_model_sophios_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_base_test_model_sophios_pipeline` is a English model originally trained by sophios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_test_model_sophios_pipeline_en_5.5.0_3.0_1725632242634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_test_model_sophios_pipeline_en_5.5.0_3.0_1725632242634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("camembert_base_test_model_sophios_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("camembert_base_test_model_sophios_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_test_model_sophios_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sophios/camembert-base-test-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_en.md b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_en.md new file mode 100644 index 00000000000000..fcd7f0bd5e7262 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English candle_cvss_complexity MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_complexity +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_complexity` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_complexity_en_5.5.0_3.0_1725655359866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_complexity_en_5.5.0_3.0_1725655359866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_complexity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_complexity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_complexity| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_complexity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_pipeline_en.md new file mode 100644 index 00000000000000..dcf76a17e1024b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_complexity_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English candle_cvss_complexity_pipeline pipeline MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_complexity_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_complexity_pipeline` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_complexity_pipeline_en_5.5.0_3.0_1725655381083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_complexity_pipeline_en_5.5.0_3.0_1725655381083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("candle_cvss_complexity_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("candle_cvss_complexity_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_complexity_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_complexity + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_en.md b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_en.md new file mode 100644 index 00000000000000..dc4333cabf51b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English candle_cvss_confidentiality MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_confidentiality +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_confidentiality` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_confidentiality_en_5.5.0_3.0_1725655564106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_confidentiality_en_5.5.0_3.0_1725655564106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_confidentiality","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("candle_cvss_confidentiality", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_confidentiality| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_confidentiality \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_pipeline_en.md new file mode 100644 index 00000000000000..06d93e1f376a45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_confidentiality_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English candle_cvss_confidentiality_pipeline pipeline MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_confidentiality_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_confidentiality_pipeline` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_confidentiality_pipeline_en_5.5.0_3.0_1725655583796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_confidentiality_pipeline_en_5.5.0_3.0_1725655583796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("candle_cvss_confidentiality_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("candle_cvss_confidentiality_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_confidentiality_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_confidentiality + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_interaction_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_interaction_pipeline_en.md new file mode 100644 index 00000000000000..8c542142b97747 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-candle_cvss_interaction_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English candle_cvss_interaction_pipeline pipeline MPNetForSequenceClassification from iashour +author: John Snow Labs +name: candle_cvss_interaction_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`candle_cvss_interaction_pipeline` is a English model originally trained by iashour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/candle_cvss_interaction_pipeline_en_5.5.0_3.0_1725655406975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/candle_cvss_interaction_pipeline_en_5.5.0_3.0_1725655406975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("candle_cvss_interaction_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("candle_cvss_interaction_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|candle_cvss_interaction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/iashour/CANDLE_cvss_interaction + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_en.md new file mode 100644 index 00000000000000..0c957c11e54fe4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English canvers_audio_caption_v1 WhisperForCTC from circulus +author: John Snow Labs +name: canvers_audio_caption_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`canvers_audio_caption_v1` is a English model originally trained by circulus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/canvers_audio_caption_v1_en_5.5.0_3.0_1725582557816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/canvers_audio_caption_v1_en_5.5.0_3.0_1725582557816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("canvers_audio_caption_v1","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("canvers_audio_caption_v1", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|canvers_audio_caption_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/circulus/canvers-audio-caption-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_pipeline_en.md new file mode 100644 index 00000000000000..62becc91b95735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-canvers_audio_caption_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English canvers_audio_caption_v1_pipeline pipeline WhisperForCTC from circulus +author: John Snow Labs +name: canvers_audio_caption_v1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`canvers_audio_caption_v1_pipeline` is a English model originally trained by circulus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/canvers_audio_caption_v1_pipeline_en_5.5.0_3.0_1725582645036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/canvers_audio_caption_v1_pipeline_en_5.5.0_3.0_1725582645036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("canvers_audio_caption_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("canvers_audio_caption_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|canvers_audio_caption_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/circulus/canvers-audio-caption-v1 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-chatutterance_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-chatutterance_english_pipeline_en.md new file mode 100644 index 00000000000000..cb95cfbb8e407c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-chatutterance_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English chatutterance_english_pipeline pipeline BertForTokenClassification from talkbank +author: John Snow Labs +name: chatutterance_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chatutterance_english_pipeline` is a English model originally trained by talkbank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chatutterance_english_pipeline_en_5.5.0_3.0_1725664018010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chatutterance_english_pipeline_en_5.5.0_3.0_1725664018010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("chatutterance_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("chatutterance_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chatutterance_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/talkbank/CHATUtterance-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-chuvash_validator_en.md b/docs/_posts/ahmedlone127/2024-09-06-chuvash_validator_en.md new file mode 100644 index 00000000000000..7014d14f1f3714 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-chuvash_validator_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English chuvash_validator MPNetEmbeddings from ivanzidov +author: John Snow Labs +name: chuvash_validator +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chuvash_validator` is a English model originally trained by ivanzidov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chuvash_validator_en_5.5.0_3.0_1725595263735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chuvash_validator_en_5.5.0_3.0_1725595263735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("chuvash_validator","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("chuvash_validator","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chuvash_validator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/ivanzidov/cv-validator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-claim_extraction_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-claim_extraction_classifier_pipeline_en.md new file mode 100644 index 00000000000000..3dd2501396dd88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-claim_extraction_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English claim_extraction_classifier_pipeline pipeline DeBertaForSequenceClassification from KnutJaegersberg +author: John Snow Labs +name: claim_extraction_classifier_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`claim_extraction_classifier_pipeline` is a English model originally trained by KnutJaegersberg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/claim_extraction_classifier_pipeline_en_5.5.0_3.0_1725612105733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/claim_extraction_classifier_pipeline_en_5.5.0_3.0_1725612105733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("claim_extraction_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("claim_extraction_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|claim_extraction_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/KnutJaegersberg/claim_extraction_classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_en.md new file mode 100644 index 00000000000000..035d3569b39ac2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English clinicalbert_craft_ner_nepal_bhasa DistilBertForTokenClassification from judithrosell +author: John Snow Labs +name: clinicalbert_craft_ner_nepal_bhasa +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalbert_craft_ner_nepal_bhasa` is a English model originally trained by judithrosell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalbert_craft_ner_nepal_bhasa_en_5.5.0_3.0_1725653517690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalbert_craft_ner_nepal_bhasa_en_5.5.0_3.0_1725653517690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("clinicalbert_craft_ner_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("clinicalbert_craft_ner_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalbert_craft_ner_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/judithrosell/ClinicalBERT_CRAFT_NER_new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_pipeline_en.md new file mode 100644 index 00000000000000..e5c2b6e27e38b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-clinicalbert_craft_ner_nepal_bhasa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English clinicalbert_craft_ner_nepal_bhasa_pipeline pipeline DistilBertForTokenClassification from judithrosell +author: John Snow Labs +name: clinicalbert_craft_ner_nepal_bhasa_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalbert_craft_ner_nepal_bhasa_pipeline` is a English model originally trained by judithrosell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalbert_craft_ner_nepal_bhasa_pipeline_en_5.5.0_3.0_1725653541951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalbert_craft_ner_nepal_bhasa_pipeline_en_5.5.0_3.0_1725653541951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clinicalbert_craft_ner_nepal_bhasa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clinicalbert_craft_ner_nepal_bhasa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalbert_craft_ner_nepal_bhasa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/judithrosell/ClinicalBERT_CRAFT_NER_new + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pipeline_pt.md new file mode 100644 index 00000000000000..e412e4e45b3b57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese clinicalnerpt_procedure_pipeline pipeline BertForTokenClassification from pucpr +author: John Snow Labs +name: clinicalnerpt_procedure_pipeline +date: 2024-09-06 +tags: [pt, open_source, pipeline, onnx] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalnerpt_procedure_pipeline` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalnerpt_procedure_pipeline_pt_5.5.0_3.0_1725663904630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalnerpt_procedure_pipeline_pt_5.5.0_3.0_1725663904630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clinicalnerpt_procedure_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clinicalnerpt_procedure_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalnerpt_procedure_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|664.9 MB| + +## References + +https://huggingface.co/pucpr/clinicalnerpt-procedure + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pt.md b/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pt.md new file mode 100644 index 00000000000000..3c053162e288ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-clinicalnerpt_procedure_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese clinicalnerpt_procedure BertForTokenClassification from pucpr +author: John Snow Labs +name: clinicalnerpt_procedure +date: 2024-09-06 +tags: [pt, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalnerpt_procedure` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalnerpt_procedure_pt_5.5.0_3.0_1725663872345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalnerpt_procedure_pt_5.5.0_3.0_1725663872345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("clinicalnerpt_procedure","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("clinicalnerpt_procedure", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalnerpt_procedure| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|pt| +|Size:|664.9 MB| + +## References + +https://huggingface.co/pucpr/clinicalnerpt-procedure \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-clip_fashion_attribute_model_try_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-clip_fashion_attribute_model_try_1_pipeline_en.md new file mode 100644 index 00000000000000..6519bad5871f80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-clip_fashion_attribute_model_try_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English clip_fashion_attribute_model_try_1_pipeline pipeline CLIPForZeroShotClassification from Geetansh13 +author: John Snow Labs +name: clip_fashion_attribute_model_try_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Zero-Shot Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CLIPForZeroShotClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clip_fashion_attribute_model_try_1_pipeline` is a English model originally trained by Geetansh13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clip_fashion_attribute_model_try_1_pipeline_en_5.5.0_3.0_1725650345068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clip_fashion_attribute_model_try_1_pipeline_en_5.5.0_3.0_1725650345068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clip_fashion_attribute_model_try_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clip_fashion_attribute_model_try_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clip_fashion_attribute_model_try_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Geetansh13/clip-fashion-attribute-model-try-1 + +## Included Models + +- ImageAssembler +- CLIPForZeroShotClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en.md new file mode 100644 index 00000000000000..a7971d1d61ad0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline pipeline RoBertaForTokenClassification from DianaIulia +author: John Snow Labs +name: code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline` is a English model originally trained by DianaIulia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en_5.5.0_3.0_1725666254476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline_en_5.5.0_3.0_1725666254476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|code_search_codebert_base_5_random_trimmed_with_g_and_spaces_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/DianaIulia/code_search_codebert_base_5_random_trimmed_with_g_and_spaces + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-codice_fiscale_en.md b/docs/_posts/ahmedlone127/2024-09-06-codice_fiscale_en.md new file mode 100644 index 00000000000000..3cf618569dd088 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-codice_fiscale_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English codice_fiscale DistilBertForTokenClassification from raoulmago +author: John Snow Labs +name: codice_fiscale +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codice_fiscale` is a English model originally trained by raoulmago. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codice_fiscale_en_5.5.0_3.0_1725599367486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codice_fiscale_en_5.5.0_3.0_1725599367486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("codice_fiscale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("codice_fiscale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codice_fiscale| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/raoulmago/codice_fiscale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-complaints_classifier_jpsteinhafel_en.md b/docs/_posts/ahmedlone127/2024-09-06-complaints_classifier_jpsteinhafel_en.md new file mode 100644 index 00000000000000..812f2dda5bace4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-complaints_classifier_jpsteinhafel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English complaints_classifier_jpsteinhafel DistilBertForSequenceClassification from jpsteinhafel +author: John Snow Labs +name: complaints_classifier_jpsteinhafel +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`complaints_classifier_jpsteinhafel` is a English model originally trained by jpsteinhafel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/complaints_classifier_jpsteinhafel_en_5.5.0_3.0_1725608148812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/complaints_classifier_jpsteinhafel_en_5.5.0_3.0_1725608148812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("complaints_classifier_jpsteinhafel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("complaints_classifier_jpsteinhafel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|complaints_classifier_jpsteinhafel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jpsteinhafel/complaints_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-congretimbau_en.md b/docs/_posts/ahmedlone127/2024-09-06-congretimbau_en.md new file mode 100644 index 00000000000000..3b9c86b0bf3203 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-congretimbau_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English congretimbau BertEmbeddings from belisards +author: John Snow Labs +name: congretimbau +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`congretimbau` is a English model originally trained by belisards. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/congretimbau_en_5.5.0_3.0_1725659504022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/congretimbau_en_5.5.0_3.0_1725659504022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("congretimbau","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("congretimbau","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|congretimbau| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/belisards/congretimbau \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-congretimbau_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-congretimbau_pipeline_en.md new file mode 100644 index 00000000000000..81efa2e1d9a338 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-congretimbau_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English congretimbau_pipeline pipeline BertEmbeddings from belisards +author: John Snow Labs +name: congretimbau_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`congretimbau_pipeline` is a English model originally trained by belisards. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/congretimbau_pipeline_en_5.5.0_3.0_1725659561096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/congretimbau_pipeline_en_5.5.0_3.0_1725659561096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("congretimbau_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("congretimbau_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|congretimbau_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/belisards/congretimbau + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-content_en.md b/docs/_posts/ahmedlone127/2024-09-06-content_en.md new file mode 100644 index 00000000000000..d18dff1a602ec3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-content_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English content DistilBertForQuestionAnswering from namnthust +author: John Snow Labs +name: content +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`content` is a English model originally trained by namnthust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/content_en_5.5.0_3.0_1725654986416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/content_en_5.5.0_3.0_1725654986416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("content","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("content", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|content| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/namnthust/content \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_pipeline_tr.md new file mode 100644 index 00000000000000..7149ef0a867cc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish convberturk_keyword_extractor_pipeline pipeline BertForTokenClassification from yanekyuk +author: John Snow Labs +name: convberturk_keyword_extractor_pipeline +date: 2024-09-06 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convberturk_keyword_extractor_pipeline` is a Turkish model originally trained by yanekyuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convberturk_keyword_extractor_pipeline_tr_5.5.0_3.0_1725663337352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convberturk_keyword_extractor_pipeline_tr_5.5.0_3.0_1725663337352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("convberturk_keyword_extractor_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("convberturk_keyword_extractor_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convberturk_keyword_extractor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|400.1 MB| + +## References + +https://huggingface.co/yanekyuk/convberturk-keyword-extractor + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_tr.md b/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_tr.md new file mode 100644 index 00000000000000..bacf28ffca9d44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-convberturk_keyword_extractor_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish convberturk_keyword_extractor BertForTokenClassification from yanekyuk +author: John Snow Labs +name: convberturk_keyword_extractor +date: 2024-09-06 +tags: [tr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convberturk_keyword_extractor` is a Turkish model originally trained by yanekyuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convberturk_keyword_extractor_tr_5.5.0_3.0_1725663318277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convberturk_keyword_extractor_tr_5.5.0_3.0_1725663318277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("convberturk_keyword_extractor","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("convberturk_keyword_extractor", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convberturk_keyword_extractor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|400.1 MB| + +## References + +https://huggingface.co/yanekyuk/convberturk-keyword-extractor \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cpegen_vpv_en.md b/docs/_posts/ahmedlone127/2024-09-06-cpegen_vpv_en.md new file mode 100644 index 00000000000000..d53a2e581e1473 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cpegen_vpv_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cpegen_vpv DistilBertForTokenClassification from Neurona +author: John Snow Labs +name: cpegen_vpv +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpegen_vpv` is a English model originally trained by Neurona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpegen_vpv_en_5.5.0_3.0_1725653848083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpegen_vpv_en_5.5.0_3.0_1725653848083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("cpegen_vpv","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("cpegen_vpv", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpegen_vpv| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Neurona/cpegen_vpv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_en.md new file mode 100644 index 00000000000000..a661a50b0f2e4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English cpu_conditional_classifier MPNetEmbeddings from mtyrrell +author: John Snow Labs +name: cpu_conditional_classifier +date: 2024-09-06 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_conditional_classifier` is a English model originally trained by mtyrrell. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_conditional_classifier_en_5.5.0_3.0_1725629714293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_conditional_classifier_en_5.5.0_3.0_1725629714293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("cpu_conditional_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("cpu_conditional_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_conditional_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +References + +https://huggingface.co/mtyrrell/CPU_Conditional_Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_pipeline_en.md new file mode 100644 index 00000000000000..c5f57d27562cbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cpu_conditional_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpu_conditional_classifier_pipeline pipeline MPNetForSequenceClassification from mtyrrell +author: John Snow Labs +name: cpu_conditional_classifier_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_conditional_classifier_pipeline` is a English model originally trained by mtyrrell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_conditional_classifier_pipeline_en_5.5.0_3.0_1725629734047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_conditional_classifier_pipeline_en_5.5.0_3.0_1725629734047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpu_conditional_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpu_conditional_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_conditional_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/mtyrrell/CPU_Conditional_Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cros_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-cros_1_en.md new file mode 100644 index 00000000000000..6b080d670d8d67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cros_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cros_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: cros_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cros_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cros_1_en_5.5.0_3.0_1725613834394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cros_1_en_5.5.0_3.0_1725613834394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("cros_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("cros_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cros_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Cros_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cros_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-cros_1_pipeline_en.md new file mode 100644 index 00000000000000..85e4d3ced72eb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cros_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cros_1_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: cros_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cros_1_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cros_1_pipeline_en_5.5.0_3.0_1725613857523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cros_1_pipeline_en_5.5.0_3.0_1725613857523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cros_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cros_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cros_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Cros_1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cryptobertrefined_en.md b/docs/_posts/ahmedlone127/2024-09-06-cryptobertrefined_en.md new file mode 100644 index 00000000000000..d60e35adc17d42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cryptobertrefined_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cryptobertrefined RoBertaForSequenceClassification from AfterRain007 +author: John Snow Labs +name: cryptobertrefined +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cryptobertrefined` is a English model originally trained by AfterRain007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cryptobertrefined_en_5.5.0_3.0_1725612896286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cryptobertrefined_en_5.5.0_3.0_1725612896286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("cryptobertrefined","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("cryptobertrefined", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cryptobertrefined| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/AfterRain007/cryptobertRefined \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_en.md b/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_en.md new file mode 100644 index 00000000000000..3785d1192e5470 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cs431_vietnamese_coqe_csi_v2 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: cs431_vietnamese_coqe_csi_v2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cs431_vietnamese_coqe_csi_v2` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cs431_vietnamese_coqe_csi_v2_en_5.5.0_3.0_1725619825884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cs431_vietnamese_coqe_csi_v2_en_5.5.0_3.0_1725619825884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cs431_vietnamese_coqe_csi_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cs431_vietnamese_coqe_csi_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cs431_vietnamese_coqe_csi_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|839.4 MB| + +## References + +https://huggingface.co/ThuyNT03/CS431_Vi-COQE_CSI_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_pipeline_en.md new file mode 100644 index 00000000000000..a804b6062006f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-cs431_vietnamese_coqe_csi_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cs431_vietnamese_coqe_csi_v2_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: cs431_vietnamese_coqe_csi_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cs431_vietnamese_coqe_csi_v2_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cs431_vietnamese_coqe_csi_v2_pipeline_en_5.5.0_3.0_1725619894293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cs431_vietnamese_coqe_csi_v2_pipeline_en_5.5.0_3.0_1725619894293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cs431_vietnamese_coqe_csi_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cs431_vietnamese_coqe_csi_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cs431_vietnamese_coqe_csi_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|839.4 MB| + +## References + +https://huggingface.co/ThuyNT03/CS431_Vi-COQE_CSI_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_en.md new file mode 100644 index 00000000000000..7541040699240c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_cos_xlmr_20230920_2 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_cos_xlmr_20230920_2 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_cos_xlmr_20230920_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230920_2_en_5.5.0_3.0_1725640587987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230920_2_en_5.5.0_3.0_1725640587987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_cos_xlmr_20230920_2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_cos_xlmr_20230920_2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_cos_xlmr_20230920_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|875.8 MB| + +## References + +https://huggingface.co/intanm/ct-cos-xlmr-20230920-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_pipeline_en.md new file mode 100644 index 00000000000000..c22a18e09a2722 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ct_cos_xlmr_20230920_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ct_cos_xlmr_20230920_2_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_cos_xlmr_20230920_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_cos_xlmr_20230920_2_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230920_2_pipeline_en_5.5.0_3.0_1725640654535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_cos_xlmr_20230920_2_pipeline_en_5.5.0_3.0_1725640654535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ct_cos_xlmr_20230920_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ct_cos_xlmr_20230920_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_cos_xlmr_20230920_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|875.8 MB| + +## References + +https://huggingface.co/intanm/ct-cos-xlmr-20230920-2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ct_kld_xlmr_20230814_en.md b/docs/_posts/ahmedlone127/2024-09-06-ct_kld_xlmr_20230814_en.md new file mode 100644 index 00000000000000..7af227e10dcb1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ct_kld_xlmr_20230814_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ct_kld_xlmr_20230814 XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: ct_kld_xlmr_20230814 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_kld_xlmr_20230814` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230814_en_5.5.0_3.0_1725641148367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_kld_xlmr_20230814_en_5.5.0_3.0_1725641148367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230814","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("ct_kld_xlmr_20230814", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_kld_xlmr_20230814| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|875.2 MB| + +## References + +https://huggingface.co/intanm/ct-kld-xlmr-20230814 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-danish_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-danish_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..dbad89178bf764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-danish_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English danish_distilbert_pipeline pipeline DistilBertEmbeddings from gc394 +author: John Snow Labs +name: danish_distilbert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`danish_distilbert_pipeline` is a English model originally trained by gc394. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/danish_distilbert_pipeline_en_5.5.0_3.0_1725639399771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/danish_distilbert_pipeline_en_5.5.0_3.0_1725639399771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("danish_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("danish_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|danish_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gc394/da_distilbert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_en.md new file mode 100644 index 00000000000000..38f77cc9295cb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English darkbert_finetuned_ner RoBertaForTokenClassification from guidobenb +author: John Snow Labs +name: darkbert_finetuned_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`darkbert_finetuned_ner` is a English model originally trained by guidobenb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/darkbert_finetuned_ner_en_5.5.0_3.0_1725666544244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/darkbert_finetuned_ner_en_5.5.0_3.0_1725666544244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("darkbert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("darkbert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|darkbert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/guidobenb/DarkBERT-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..9dc8e31410b27e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-darkbert_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English darkbert_finetuned_ner_pipeline pipeline RoBertaForTokenClassification from guidobenb +author: John Snow Labs +name: darkbert_finetuned_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`darkbert_finetuned_ner_pipeline` is a English model originally trained by guidobenb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/darkbert_finetuned_ner_pipeline_en_5.5.0_3.0_1725666565195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/darkbert_finetuned_ner_pipeline_en_5.5.0_3.0_1725666565195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("darkbert_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("darkbert_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|darkbert_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/guidobenb/DarkBERT-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_en.md b/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_en.md new file mode 100644 index 00000000000000..a079b6f245bed5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dataequity_kde4_english_german_qlora_dataequity MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_kde4_english_german_qlora_dataequity +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_kde4_english_german_qlora_dataequity` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_german_qlora_dataequity_en_5.5.0_3.0_1725635894146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_german_qlora_dataequity_en_5.5.0_3.0_1725635894146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("dataequity_kde4_english_german_qlora_dataequity","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("dataequity_kde4_english_german_qlora_dataequity","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_kde4_english_german_qlora_dataequity| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|315.8 MB| + +## References + +https://huggingface.co/dataequity/dataequity-kde4-en-de-qlora \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_pipeline_en.md new file mode 100644 index 00000000000000..3557e5c31867ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dataequity_kde4_english_german_qlora_dataequity_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dataequity_kde4_english_german_qlora_dataequity_pipeline pipeline MarianTransformer from dataequity +author: John Snow Labs +name: dataequity_kde4_english_german_qlora_dataequity_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dataequity_kde4_english_german_qlora_dataequity_pipeline` is a English model originally trained by dataequity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_german_qlora_dataequity_pipeline_en_5.5.0_3.0_1725635988114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dataequity_kde4_english_german_qlora_dataequity_pipeline_en_5.5.0_3.0_1725635988114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dataequity_kde4_english_german_qlora_dataequity_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dataequity_kde4_english_german_qlora_dataequity_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dataequity_kde4_english_german_qlora_dataequity_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|316.3 MB| + +## References + +https://huggingface.co/dataequity/dataequity-kde4-en-de-qlora + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_en.md b/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_en.md new file mode 100644 index 00000000000000..eeb281689fc726 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dbbuc_30p DistilBertForTokenClassification from lilzzz +author: John Snow Labs +name: dbbuc_30p +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbbuc_30p` is a English model originally trained by lilzzz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbbuc_30p_en_5.5.0_3.0_1725599078963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbbuc_30p_en_5.5.0_3.0_1725599078963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("dbbuc_30p","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("dbbuc_30p", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbbuc_30p| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/lilzzz/dbbuc_30p \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_pipeline_en.md new file mode 100644 index 00000000000000..6d87de063981a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dbbuc_30p_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dbbuc_30p_pipeline pipeline DistilBertForTokenClassification from lilzzz +author: John Snow Labs +name: dbbuc_30p_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbbuc_30p_pipeline` is a English model originally trained by lilzzz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbbuc_30p_pipeline_en_5.5.0_3.0_1725599092127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbbuc_30p_pipeline_en_5.5.0_3.0_1725599092127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dbbuc_30p_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dbbuc_30p_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbbuc_30p_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/lilzzz/dbbuc_30p + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_amazon_reviews_v1_shuli_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_amazon_reviews_v1_shuli_en.md new file mode 100644 index 00000000000000..2bc4bdcb5f2632 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_amazon_reviews_v1_shuli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_amazon_reviews_v1_shuli DeBertaForSequenceClassification from shuli +author: John Snow Labs +name: deberta_amazon_reviews_v1_shuli +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_amazon_reviews_v1_shuli` is a English model originally trained by shuli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_shuli_en_5.5.0_3.0_1725590774703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_amazon_reviews_v1_shuli_en_5.5.0_3.0_1725590774703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_shuli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_amazon_reviews_v1_shuli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_amazon_reviews_v1_shuli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|617.4 MB| + +## References + +https://huggingface.co/shuli/deberta_amazon_reviews_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_en.md new file mode 100644 index 00000000000000..e3e7fc5fa98b95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_large_hallucination_eval_v2 DeBertaForSequenceClassification from mathislucka +author: John Snow Labs +name: deberta_large_hallucination_eval_v2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_large_hallucination_eval_v2` is a English model originally trained by mathislucka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_large_hallucination_eval_v2_en_5.5.0_3.0_1725611797195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_large_hallucination_eval_v2_en_5.5.0_3.0_1725611797195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_large_hallucination_eval_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_large_hallucination_eval_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_large_hallucination_eval_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/mathislucka/deberta-large-hallucination-eval-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_pipeline_en.md new file mode 100644 index 00000000000000..eac864f3862a24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_large_hallucination_eval_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_large_hallucination_eval_v2_pipeline pipeline DeBertaForSequenceClassification from mathislucka +author: John Snow Labs +name: deberta_large_hallucination_eval_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_large_hallucination_eval_v2_pipeline` is a English model originally trained by mathislucka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_large_hallucination_eval_v2_pipeline_en_5.5.0_3.0_1725611874837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_large_hallucination_eval_v2_pipeline_en_5.5.0_3.0_1725611874837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_large_hallucination_eval_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_large_hallucination_eval_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_large_hallucination_eval_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/mathislucka/deberta-large-hallucination-eval-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_en.md new file mode 100644 index 00000000000000..4d7ea1d12eb580 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_base DeBertaForSequenceClassification from bitsanlp +author: John Snow Labs +name: deberta_v3_base_base +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_base` is a English model originally trained by bitsanlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_base_en_5.5.0_3.0_1725589482728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_base_en_5.5.0_3.0_1725589482728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|609.9 MB| + +## References + +https://huggingface.co/bitsanlp/deberta-v3-base_base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_pipeline_en.md new file mode 100644 index 00000000000000..bac6e77dea9a4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_base_pipeline pipeline DeBertaForSequenceClassification from bitsanlp +author: John Snow Labs +name: deberta_v3_base_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_base_pipeline` is a English model originally trained by bitsanlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_base_pipeline_en_5.5.0_3.0_1725589526796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_base_pipeline_en_5.5.0_3.0_1725589526796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|609.9 MB| + +## References + +https://huggingface.co/bitsanlp/deberta-v3-base_base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_glue_cola_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_glue_cola_pipeline_en.md new file mode 100644 index 00000000000000..983c5347dafcdb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_glue_cola_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_glue_cola_pipeline pipeline DeBertaForSequenceClassification from ficsort +author: John Snow Labs +name: deberta_v3_base_glue_cola_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_glue_cola_pipeline` is a English model originally trained by ficsort. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_glue_cola_pipeline_en_5.5.0_3.0_1725610580364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_glue_cola_pipeline_en_5.5.0_3.0_1725610580364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_glue_cola_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_glue_cola_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_glue_cola_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|572.2 MB| + +## References + +https://huggingface.co/ficsort/deberta-v3-base-glue-cola + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_en.md new file mode 100644 index 00000000000000..d8092e00a7157d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_qqp_garipovroma DeBertaForSequenceClassification from garipovroma +author: John Snow Labs +name: deberta_v3_base_qqp_garipovroma +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_qqp_garipovroma` is a English model originally trained by garipovroma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qqp_garipovroma_en_5.5.0_3.0_1725590427089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qqp_garipovroma_en_5.5.0_3.0_1725590427089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_qqp_garipovroma","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_qqp_garipovroma", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_qqp_garipovroma| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|648.4 MB| + +## References + +https://huggingface.co/garipovroma/deberta-v3-base-qqp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_pipeline_en.md new file mode 100644 index 00000000000000..1fe69775880ec4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_qqp_garipovroma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_qqp_garipovroma_pipeline pipeline DeBertaForSequenceClassification from garipovroma +author: John Snow Labs +name: deberta_v3_base_qqp_garipovroma_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_qqp_garipovroma_pipeline` is a English model originally trained by garipovroma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qqp_garipovroma_pipeline_en_5.5.0_3.0_1725590476709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_qqp_garipovroma_pipeline_en_5.5.0_3.0_1725590476709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_qqp_garipovroma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_qqp_garipovroma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_qqp_garipovroma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|648.5 MB| + +## References + +https://huggingface.co/garipovroma/deberta-v3-base-qqp + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_en.md new file mode 100644 index 00000000000000..953fe800b0b25f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_rocstories DeBertaForSequenceClassification from KeiHeityuu +author: John Snow Labs +name: deberta_v3_base_rocstories +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_rocstories` is a English model originally trained by KeiHeityuu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_rocstories_en_5.5.0_3.0_1725588811529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_rocstories_en_5.5.0_3.0_1725588811529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_rocstories","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_rocstories", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_rocstories| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|580.4 MB| + +## References + +https://huggingface.co/KeiHeityuu/deberta-v3-base-rocstories \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_test_pipeline_en.md new file mode 100644 index 00000000000000..585390169e0b95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_rocstories_test_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_rocstories_test_pipeline pipeline DeBertaForSequenceClassification from KeiHeityuu +author: John Snow Labs +name: deberta_v3_base_rocstories_test_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_rocstories_test_pipeline` is a English model originally trained by KeiHeityuu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_rocstories_test_pipeline_en_5.5.0_3.0_1725591075147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_rocstories_test_pipeline_en_5.5.0_3.0_1725591075147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_rocstories_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_rocstories_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_rocstories_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|575.2 MB| + +## References + +https://huggingface.co/KeiHeityuu/deberta-v3-base-rocstories-test + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_en.md new file mode 100644 index 00000000000000..084dd6d51fcb6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_base_tasksource_paraphrase DeBertaForSequenceClassification from sileod +author: John Snow Labs +name: deberta_v3_base_tasksource_paraphrase +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_tasksource_paraphrase` is a English model originally trained by sileod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_paraphrase_en_5.5.0_3.0_1725611637338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_paraphrase_en_5.5.0_3.0_1725611637338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_tasksource_paraphrase","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_base_tasksource_paraphrase", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_tasksource_paraphrase| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|688.9 MB| + +## References + +https://huggingface.co/sileod/deberta-v3-base-tasksource-paraphrase \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_pipeline_en.md new file mode 100644 index 00000000000000..816f3b7819eee5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_base_tasksource_paraphrase_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_base_tasksource_paraphrase_pipeline pipeline DeBertaForSequenceClassification from sileod +author: John Snow Labs +name: deberta_v3_base_tasksource_paraphrase_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_base_tasksource_paraphrase_pipeline` is a English model originally trained by sileod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_paraphrase_pipeline_en_5.5.0_3.0_1725611673444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_base_tasksource_paraphrase_pipeline_en_5.5.0_3.0_1725611673444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_base_tasksource_paraphrase_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_base_tasksource_paraphrase_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_base_tasksource_paraphrase_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|688.9 MB| + +## References + +https://huggingface.co/sileod/deberta-v3-base-tasksource-paraphrase + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_bass_complex_questions_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_bass_complex_questions_classifier_en.md new file mode 100644 index 00000000000000..ca9c00913ac2b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_bass_complex_questions_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_bass_complex_questions_classifier DeBertaForSequenceClassification from nogae +author: John Snow Labs +name: deberta_v3_bass_complex_questions_classifier +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_bass_complex_questions_classifier` is a English model originally trained by nogae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_bass_complex_questions_classifier_en_5.5.0_3.0_1725609935876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_bass_complex_questions_classifier_en_5.5.0_3.0_1725609935876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_bass_complex_questions_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_bass_complex_questions_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_bass_complex_questions_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|688.9 MB| + +## References + +https://huggingface.co/nogae/deberta-v3-bass-complex-questions_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_en.md new file mode 100644 index 00000000000000..301e738991e5b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_16_3 DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_16_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_16_3` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_3_en_5.5.0_3.0_1725590117053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_3_en_5.5.0_3.0_1725590117053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_16_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_16_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_16_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-16-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_pipeline_en.md new file mode 100644 index 00000000000000..4915ff711e117e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_16_3_pipeline pipeline DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_16_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_16_3_pipeline` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_3_pipeline_en_5.5.0_3.0_1725590242468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_3_pipeline_en_5.5.0_3.0_1725590242468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large__sst2__train_16_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large__sst2__train_16_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_16_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-16-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_en.md new file mode 100644 index 00000000000000..c8a6c4ff5bac58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_16_7 DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_16_7 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_16_7` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_7_en_5.5.0_3.0_1725611397163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_7_en_5.5.0_3.0_1725611397163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_16_7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_16_7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_16_7| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-16-7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_pipeline_en.md new file mode 100644 index 00000000000000..0cf60e61b1d428 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_16_7_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_16_7_pipeline pipeline DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_16_7_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_16_7_pipeline` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_7_pipeline_en_5.5.0_3.0_1725611525326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_16_7_pipeline_en_5.5.0_3.0_1725611525326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large__sst2__train_16_7_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large__sst2__train_16_7_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_16_7_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-16-7 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_8_4_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_8_4_en.md new file mode 100644 index 00000000000000..a50b24c54c080a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large__sst2__train_8_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large__sst2__train_8_4 DeBertaForSequenceClassification from SetFit +author: John Snow Labs +name: deberta_v3_large__sst2__train_8_4 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large__sst2__train_8_4` is a English model originally trained by SetFit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_8_4_en_5.5.0_3.0_1725610295793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large__sst2__train_8_4_en_5.5.0_3.0_1725610295793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_8_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large__sst2__train_8_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large__sst2__train_8_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/SetFit/deberta-v3-large__sst2__train-8-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_en.md new file mode 100644 index 00000000000000..77b30e1081ee2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_emotion DeBertaForSequenceClassification from Elron +author: John Snow Labs +name: deberta_v3_large_emotion +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_emotion` is a English model originally trained by Elron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_emotion_en_5.5.0_3.0_1725610515144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_emotion_en_5.5.0_3.0_1725610515144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_emotion","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_emotion", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_emotion| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Elron/deberta-v3-large-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_pipeline_en.md new file mode 100644 index 00000000000000..9f6f0387ea3b1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_emotion_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_emotion_pipeline pipeline DeBertaForSequenceClassification from Elron +author: John Snow Labs +name: deberta_v3_large_emotion_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_emotion_pipeline` is a English model originally trained by Elron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_emotion_pipeline_en_5.5.0_3.0_1725610654352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_emotion_pipeline_en_5.5.0_3.0_1725610654352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_emotion_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_emotion_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_emotion_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Elron/deberta-v3-large-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_en.md new file mode 100644 index 00000000000000..9d862ef361160c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_mono_3_epoch_lr_5e_6 DeBertaForSequenceClassification from kpriyanshu256 +author: John Snow Labs +name: deberta_v3_large_mono_3_epoch_lr_5e_6 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_mono_3_epoch_lr_5e_6` is a English model originally trained by kpriyanshu256. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_mono_3_epoch_lr_5e_6_en_5.5.0_3.0_1725610854534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_mono_3_epoch_lr_5e_6_en_5.5.0_3.0_1725610854534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_mono_3_epoch_lr_5e_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_mono_3_epoch_lr_5e_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_mono_3_epoch_lr_5e_6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/kpriyanshu256/deberta-v3-large-mono-3-epoch-lr-5e-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en.md new file mode 100644 index 00000000000000..580045dd0c929d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline pipeline DeBertaForSequenceClassification from kpriyanshu256 +author: John Snow Labs +name: deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline` is a English model originally trained by kpriyanshu256. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en_5.5.0_3.0_1725610935847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline_en_5.5.0_3.0_1725610935847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_mono_3_epoch_lr_5e_6_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/kpriyanshu256/deberta-v3-large-mono-3-epoch-lr-5e-6 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_en.md new file mode 100644 index 00000000000000..1cb7b399f1c983 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_offensive DeBertaForSequenceClassification from Elron +author: John Snow Labs +name: deberta_v3_large_offensive +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_offensive` is a English model originally trained by Elron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_offensive_en_5.5.0_3.0_1725589025895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_offensive_en_5.5.0_3.0_1725589025895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_offensive","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_offensive", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_offensive| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Elron/deberta-v3-large-offensive \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_pipeline_en.md new file mode 100644 index 00000000000000..f30bca363e7a59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_offensive_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_offensive_pipeline pipeline DeBertaForSequenceClassification from Elron +author: John Snow Labs +name: deberta_v3_large_offensive_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_offensive_pipeline` is a English model originally trained by Elron. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_offensive_pipeline_en_5.5.0_3.0_1725589152330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_offensive_pipeline_en_5.5.0_3.0_1725589152330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_offensive_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_offensive_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_offensive_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/Elron/deberta-v3-large-offensive + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en.md new file mode 100644 index 00000000000000..13fb2d26097eb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4 DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en_5.5.0_3.0_1725589431625.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_en_5.5.0_3.0_1725589431625.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-main_passage_old_facts-rater-half-gpt4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en.md new file mode 100644 index 00000000000000..ed0d9749366293 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en_5.5.0_3.0_1725589540792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline_en_5.5.0_3.0_1725589540792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_main_passage_old_facts_rater_half_gpt4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-main_passage_old_facts-rater-half-gpt4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en.md new file mode 100644 index 00000000000000..22f1bbdc6ccd3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4 DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en_5.5.0_3.0_1725610712373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4_en_5.5.0_3.0_1725610712373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_gpt4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-new_fact_related_passage-rater-gpt4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en.md new file mode 100644 index 00000000000000..1bb7542f053c67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en_5.5.0_3.0_1725611251872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_en_5.5.0_3.0_1725611251872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-new_fact_related_passage-rater-half \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en.md new file mode 100644 index 00000000000000..ce9be07ecb70c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en_5.5.0_3.0_1725611380951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline_en_5.5.0_3.0_1725611380951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_nepal_bhasa_fact_related_passage_rater_half_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-new_fact_related_passage-rater-half + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_en.md new file mode 100644 index 00000000000000..0bce4fe58fae2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_rater_sample_1 DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_rater_sample_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_rater_sample_1` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_rater_sample_1_en_5.5.0_3.0_1725610702791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_rater_sample_1_en_5.5.0_3.0_1725610702791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_rater_sample_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_rater_sample_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_rater_sample_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-rater-sample-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_pipeline_en.md new file mode 100644 index 00000000000000..683ca9327f541c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_rater_sample_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_rater_sample_1_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_rater_sample_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_rater_sample_1_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_rater_sample_1_pipeline_en_5.5.0_3.0_1725610790347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_rater_sample_1_pipeline_en_5.5.0_3.0_1725610790347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_rater_sample_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_rater_sample_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_rater_sample_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-rater-sample-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_en.md new file mode 100644 index 00000000000000..e5e6b18b24030e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_large_survey_related_passage_consistency_rater_half DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_related_passage_consistency_rater_half +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_related_passage_consistency_rater_half` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_half_en_5.5.0_3.0_1725589387081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_half_en_5.5.0_3.0_1725589387081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_consistency_rater_half","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_large_survey_related_passage_consistency_rater_half", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_related_passage_consistency_rater_half| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-related_passage_consistency-rater-half \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en.md new file mode 100644 index 00000000000000..4485c82847cc59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline pipeline DeBertaForSequenceClassification from domenicrosati +author: John Snow Labs +name: deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en_5.5.0_3.0_1725589496878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline_en_5.5.0_3.0_1725589496878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_large_survey_related_passage_consistency_rater_half_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/domenicrosati/deberta-v3-large-survey-related_passage_consistency-rater-half + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_small_finetuned_mnli_rdp99_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_small_finetuned_mnli_rdp99_en.md new file mode 100644 index 00000000000000..f102fb2b4d5307 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_v3_small_finetuned_mnli_rdp99_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_v3_small_finetuned_mnli_rdp99 DeBertaForSequenceClassification from rdp99 +author: John Snow Labs +name: deberta_v3_small_finetuned_mnli_rdp99 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_v3_small_finetuned_mnli_rdp99` is a English model originally trained by rdp99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_v3_small_finetuned_mnli_rdp99_en_5.5.0_3.0_1725589807923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_v3_small_finetuned_mnli_rdp99_en_5.5.0_3.0_1725589807923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_small_finetuned_mnli_rdp99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_v3_small_finetuned_mnli_rdp99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_v3_small_finetuned_mnli_rdp99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|487.6 MB| + +## References + +https://huggingface.co/rdp99/deberta-v3-small-finetuned-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deberta_xsmall_hatespeech_reward_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-deberta_xsmall_hatespeech_reward_model_en.md new file mode 100644 index 00000000000000..d08e9947417dbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deberta_xsmall_hatespeech_reward_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deberta_xsmall_hatespeech_reward_model DeBertaForSequenceClassification from shahrukhx01 +author: John Snow Labs +name: deberta_xsmall_hatespeech_reward_model +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_xsmall_hatespeech_reward_model` is a English model originally trained by shahrukhx01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_xsmall_hatespeech_reward_model_en_5.5.0_3.0_1725609741191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_xsmall_hatespeech_reward_model_en_5.5.0_3.0_1725609741191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_xsmall_hatespeech_reward_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deberta_xsmall_hatespeech_reward_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_xsmall_hatespeech_reward_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|253.4 MB| + +## References + +https://huggingface.co/shahrukhx01/deberta-xsmall-hatespeech-reward-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_en.md b/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_en.md new file mode 100644 index 00000000000000..23426ae67f37c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English declutr_s10_arabic RoBertaForSequenceClassification from AnonymousSub +author: John Snow Labs +name: declutr_s10_arabic +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`declutr_s10_arabic` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/declutr_s10_arabic_en_5.5.0_3.0_1725613328114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/declutr_s10_arabic_en_5.5.0_3.0_1725613328114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("declutr_s10_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("declutr_s10_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|declutr_s10_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/AnonymousSub/declutr-s10-AR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_pipeline_en.md new file mode 100644 index 00000000000000..c93189af700d19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-declutr_s10_arabic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English declutr_s10_arabic_pipeline pipeline RoBertaForSequenceClassification from AnonymousSub +author: John Snow Labs +name: declutr_s10_arabic_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`declutr_s10_arabic_pipeline` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/declutr_s10_arabic_pipeline_en_5.5.0_3.0_1725613345085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/declutr_s10_arabic_pipeline_en_5.5.0_3.0_1725613345085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("declutr_s10_arabic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("declutr_s10_arabic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|declutr_s10_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.1 MB| + +## References + +https://huggingface.co/AnonymousSub/declutr-s10-AR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deep_4_en.md b/docs/_posts/ahmedlone127/2024-09-06-deep_4_en.md new file mode 100644 index 00000000000000..b0c5ee9cf86dc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deep_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deep_4 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: deep_4 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deep_4` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deep_4_en_5.5.0_3.0_1725613709289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deep_4_en_5.5.0_3.0_1725613709289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("deep_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("deep_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deep_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Deep_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_en.md new file mode 100644 index 00000000000000..70c632ca363cc9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deeva_modcat_seqclass_deberta_v1 DeBertaForSequenceClassification from leobg +author: John Snow Labs +name: deeva_modcat_seqclass_deberta_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deeva_modcat_seqclass_deberta_v1` is a English model originally trained by leobg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deeva_modcat_seqclass_deberta_v1_en_5.5.0_3.0_1725610127865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deeva_modcat_seqclass_deberta_v1_en_5.5.0_3.0_1725610127865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("deeva_modcat_seqclass_deberta_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("deeva_modcat_seqclass_deberta_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deeva_modcat_seqclass_deberta_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/leobg/deeva-modcat-seqclass-deberta-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_pipeline_en.md new file mode 100644 index 00000000000000..9c2503ba6f7441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-deeva_modcat_seqclass_deberta_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deeva_modcat_seqclass_deberta_v1_pipeline pipeline DeBertaForSequenceClassification from leobg +author: John Snow Labs +name: deeva_modcat_seqclass_deberta_v1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deeva_modcat_seqclass_deberta_v1_pipeline` is a English model originally trained by leobg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deeva_modcat_seqclass_deberta_v1_pipeline_en_5.5.0_3.0_1725610186657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deeva_modcat_seqclass_deberta_v1_pipeline_en_5.5.0_3.0_1725610186657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deeva_modcat_seqclass_deberta_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deeva_modcat_seqclass_deberta_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deeva_modcat_seqclass_deberta_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/leobg/deeva-modcat-seqclass-deberta-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_en.md b/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_en.md new file mode 100644 index 00000000000000..c9ffaf14ed7cc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English detectors_legit_user XlmRoBertaForSequenceClassification from Sydelabs +author: John Snow Labs +name: detectors_legit_user +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`detectors_legit_user` is a English model originally trained by Sydelabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/detectors_legit_user_en_5.5.0_3.0_1725616567955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/detectors_legit_user_en_5.5.0_3.0_1725616567955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("detectors_legit_user","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("detectors_legit_user", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|detectors_legit_user| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/Sydelabs/detectors_legit_user \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_pipeline_en.md new file mode 100644 index 00000000000000..ffe888a3d5277b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-detectors_legit_user_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English detectors_legit_user_pipeline pipeline XlmRoBertaForSequenceClassification from Sydelabs +author: John Snow Labs +name: detectors_legit_user_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`detectors_legit_user_pipeline` is a English model originally trained by Sydelabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/detectors_legit_user_pipeline_en_5.5.0_3.0_1725616622843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/detectors_legit_user_pipeline_en_5.5.0_3.0_1725616622843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("detectors_legit_user_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("detectors_legit_user_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|detectors_legit_user_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.1 GB| + +## References + +https://huggingface.co/Sydelabs/detectors_legit_user + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_en.md b/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_en.md new file mode 100644 index 00000000000000..1c335cb4b1c8fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English disease_diagnosis DistilBertForSequenceClassification from BenK10 +author: John Snow Labs +name: disease_diagnosis +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disease_diagnosis` is a English model originally trained by BenK10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disease_diagnosis_en_5.5.0_3.0_1725608545921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disease_diagnosis_en_5.5.0_3.0_1725608545921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("disease_diagnosis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("disease_diagnosis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disease_diagnosis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|252.6 MB| + +## References + +https://huggingface.co/BenK10/disease-diagnosis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_pipeline_en.md new file mode 100644 index 00000000000000..fbe766cfe118d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-disease_diagnosis_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English disease_diagnosis_pipeline pipeline DistilBertForSequenceClassification from BenK10 +author: John Snow Labs +name: disease_diagnosis_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disease_diagnosis_pipeline` is a English model originally trained by BenK10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disease_diagnosis_pipeline_en_5.5.0_3.0_1725608558941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disease_diagnosis_pipeline_en_5.5.0_3.0_1725608558941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("disease_diagnosis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("disease_diagnosis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disease_diagnosis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|252.6 MB| + +## References + +https://huggingface.co/BenK10/disease-diagnosis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_en.md new file mode 100644 index 00000000000000..d52f4db6b29bbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_chunk_2 DistilBertForTokenClassification from RobW +author: John Snow Labs +name: distilbert_base_cased_finetuned_chunk_2 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_chunk_2` is a English model originally trained by RobW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_chunk_2_en_5.5.0_3.0_1725653546131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_chunk_2_en_5.5.0_3.0_1725653546131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_finetuned_chunk_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_cased_finetuned_chunk_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_chunk_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/RobW/distilbert-base-cased-finetuned-chunk-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_pipeline_en.md new file mode 100644 index 00000000000000..defe16e3118b85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_finetuned_chunk_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_chunk_2_pipeline pipeline DistilBertForTokenClassification from RobW +author: John Snow Labs +name: distilbert_base_cased_finetuned_chunk_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_chunk_2_pipeline` is a English model originally trained by RobW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_chunk_2_pipeline_en_5.5.0_3.0_1725653557488.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_chunk_2_pipeline_en_5.5.0_3.0_1725653557488.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_finetuned_chunk_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_finetuned_chunk_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_chunk_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/RobW/distilbert-base-cased-finetuned-chunk-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en.md new file mode 100644 index 00000000000000..0234801c1e4087 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_cased_logdetective_extraction_retrained_fedora_copr DistilBertForQuestionAnswering from fedora-copr +author: John Snow Labs +name: distilbert_base_cased_logdetective_extraction_retrained_fedora_copr +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_logdetective_extraction_retrained_fedora_copr` is a English model originally trained by fedora-copr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en_5.5.0_3.0_1725621970369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_en_5.5.0_3.0_1725621970369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_logdetective_extraction_retrained_fedora_copr","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_logdetective_extraction_retrained_fedora_copr", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_logdetective_extraction_retrained_fedora_copr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/fedora-copr/distilbert-base-cased-logdetective-extraction-retrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en.md new file mode 100644 index 00000000000000..b7f2534b166db8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline pipeline DistilBertForQuestionAnswering from fedora-copr +author: John Snow Labs +name: distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline` is a English model originally trained by fedora-copr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en_5.5.0_3.0_1725621982393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline_en_5.5.0_3.0_1725621982393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_logdetective_extraction_retrained_fedora_copr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/fedora-copr/distilbert-base-cased-logdetective-extraction-retrained + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_german_cased_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_german_cased_pipeline_de.md new file mode 100644 index 00000000000000..cd9aad4f65a17a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_german_cased_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German distilbert_base_german_cased_pipeline pipeline DistilBertEmbeddings from distilbert +author: John Snow Labs +name: distilbert_base_german_cased_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_german_cased_pipeline` is a German model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_pipeline_de_5.5.0_3.0_1725639542296.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_pipeline_de_5.5.0_3.0_1725639542296.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_german_cased_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_german_cased_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_german_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|250.3 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-german-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_pipeline_xx.md new file mode 100644 index 00000000000000..8e5b70892dc654 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_pipeline pipeline DistilBertEmbeddings from distilbert +author: John Snow Labs +name: distilbert_base_multilingual_cased_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_pipeline` is a Multilingual model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_pipeline_xx_5.5.0_3.0_1725639444491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_pipeline_xx_5.5.0_3.0_1725639444491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_multilingual_cased_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_multilingual_cased_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-multilingual-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_xx.md new file mode 100644 index 00000000000000..2927ba9d659d2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_multilingual_cased_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: DistilBERT base multilingual model (cased) +author: John Snow Labs +name: distilbert_base_multilingual_cased +date: 2024-09-06 +tags: [distilbert, embeddings, xx, multilingual, open_source, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This model is a distilled version of the [BERT base multilingual model](bert-base-multilingual-cased). The code for the distillation process can be found [here](https://github.com/huggingface/transformers/tree/master/examples/research_projects/distillation). This model is cased: it does make a difference between english and English. The model is trained on the concatenation of Wikipedia in 104 different languages listed [here](https://github.com/google-research/bert/blob/master/multilingual.md#list-of-languages). + +The model has 6 layers, 768 dimension,s and 12 heads, totalizing 134M parameters (compared to 177M parameters for mBERT-base). On average DistilmBERT is twice as fast as mBERT-base. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_xx_5.5.0_3.0_1725639419148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_xx_5.5.0_3.0_1725639419148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased", "xx") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, tokenizer, embeddings]) +``` +```scala +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased", "xx") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, tokenizer, embeddings)) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.embed.distilbert").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +References + +[https://huggingface.co/distilbert-base-multilingual-cased](https://huggingface.co/distilbert-base-multilingual-cased) + +## Benchmarking + +```bash + +Benchmarking + + +| Model | English | Spanish | Chinese | German | Arabic | Urdu | +| :---: | :---: | :---: | :---: | :---: | :---: | :---:| +| mBERT base cased (computed) | 82.1 | 74.6 | 69.1 | 72.3 | 66.4 | 58.5 | +| mBERT base uncased (reported)| 81.4 | 74.3 | 63.8 | 70.5 | 62.1 | 58.3 | +| DistilmBERT | 78.2 | 69.1 | 64.0 | 66.3 | 59.1 | 54.7 | +``` \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_ner_058_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_ner_058_pipeline_en.md new file mode 100644 index 00000000000000..286bd6806bdbad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_ner_058_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_ner_058_pipeline pipeline DistilBertForTokenClassification from NguyenVanHieu1605 +author: John Snow Labs +name: distilbert_base_ner_058_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_ner_058_pipeline` is a English model originally trained by NguyenVanHieu1605. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_ner_058_pipeline_en_5.5.0_3.0_1725654067968.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_ner_058_pipeline_en_5.5.0_3.0_1725654067968.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_ner_058_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_ner_058_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_ner_058_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/NguyenVanHieu1605/distilbert-base-ner-058 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_en.md new file mode 100644 index 00000000000000..ce95d822e22210 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_ai4privacy_english DistilBertForTokenClassification from xXiaobuding +author: John Snow Labs +name: distilbert_base_uncased_ai4privacy_english +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_ai4privacy_english` is a English model originally trained by xXiaobuding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_ai4privacy_english_en_5.5.0_3.0_1725653580021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_ai4privacy_english_en_5.5.0_3.0_1725653580021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_ai4privacy_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_ai4privacy_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_ai4privacy_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/xXiaobuding/distilbert-base-uncased_ai4privacy_en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_pipeline_en.md new file mode 100644 index 00000000000000..b7d9fd39e099cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_ai4privacy_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_ai4privacy_english_pipeline pipeline DistilBertForTokenClassification from xXiaobuding +author: John Snow Labs +name: distilbert_base_uncased_ai4privacy_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_ai4privacy_english_pipeline` is a English model originally trained by xXiaobuding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_ai4privacy_english_pipeline_en_5.5.0_3.0_1725653591965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_ai4privacy_english_pipeline_en_5.5.0_3.0_1725653591965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_ai4privacy_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_ai4privacy_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_ai4privacy_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/xXiaobuding/distilbert-base-uncased_ai4privacy_en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_en.md new file mode 100644 index 00000000000000..57b8c2323645ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_distilled_squad_qa_model DistilBertForQuestionAnswering from Chetna19 +author: John Snow Labs +name: distilbert_base_uncased_distilled_squad_qa_model +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_distilled_squad_qa_model` is a English model originally trained by Chetna19. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_squad_qa_model_en_5.5.0_3.0_1725621993836.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_squad_qa_model_en_5.5.0_3.0_1725621993836.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_distilled_squad_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_distilled_squad_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_distilled_squad_qa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chetna19/distilbert-base-uncased-distilled-squad_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_pipeline_en.md new file mode 100644 index 00000000000000..24c7324d67cc35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_distilled_squad_qa_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_distilled_squad_qa_model_pipeline pipeline DistilBertForQuestionAnswering from Chetna19 +author: John Snow Labs +name: distilbert_base_uncased_distilled_squad_qa_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_distilled_squad_qa_model_pipeline` is a English model originally trained by Chetna19. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_squad_qa_model_pipeline_en_5.5.0_3.0_1725622006233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_squad_qa_model_pipeline_en_5.5.0_3.0_1725622006233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_distilled_squad_qa_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_distilled_squad_qa_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_distilled_squad_qa_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chetna19/distilbert-base-uncased-distilled-squad_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_en.md new file mode 100644 index 00000000000000..94ae6028d40fb4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_emotion_xyang2023 DistilBertForSequenceClassification from XYang2023 +author: John Snow Labs +name: distilbert_base_uncased_emotion_xyang2023 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_emotion_xyang2023` is a English model originally trained by XYang2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_xyang2023_en_5.5.0_3.0_1725607834874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_xyang2023_en_5.5.0_3.0_1725607834874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_emotion_xyang2023","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_emotion_xyang2023", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_emotion_xyang2023| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/XYang2023/distilbert-base-uncased-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_pipeline_en.md new file mode 100644 index 00000000000000..385837e3710387 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_emotion_xyang2023_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_emotion_xyang2023_pipeline pipeline DistilBertForSequenceClassification from XYang2023 +author: John Snow Labs +name: distilbert_base_uncased_emotion_xyang2023_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_emotion_xyang2023_pipeline` is a English model originally trained by XYang2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_xyang2023_pipeline_en_5.5.0_3.0_1725607852804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_emotion_xyang2023_pipeline_en_5.5.0_3.0_1725607852804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_emotion_xyang2023_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_emotion_xyang2023_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_emotion_xyang2023_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/XYang2023/distilbert-base-uncased-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_clinc_buruzaemon_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_clinc_buruzaemon_en.md new file mode 100644 index 00000000000000..6f46a23d2d3db4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_clinc_buruzaemon_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_buruzaemon DistilBertForSequenceClassification from buruzaemon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_buruzaemon +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_buruzaemon` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_buruzaemon_en_5.5.0_3.0_1725608674331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_buruzaemon_en_5.5.0_3.0_1725608674331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_buruzaemon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_buruzaemon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_buruzaemon| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/buruzaemon/distilbert-base-uncased-finetuned-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_en.md new file mode 100644 index 00000000000000..7beb533414dba7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_dourc DistilBertForQuestionAnswering from suthanhcong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_dourc +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_dourc` is a English model originally trained by suthanhcong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dourc_en_5.5.0_3.0_1725654838109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dourc_en_5.5.0_3.0_1725654838109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_dourc","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_dourc", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_dourc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/suthanhcong/distilbert-base-uncased-finetuned-DouRC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_pipeline_en.md new file mode 100644 index 00000000000000..412649dc98d8e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_dourc_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_dourc_pipeline pipeline DistilBertForQuestionAnswering from suthanhcong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_dourc_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_dourc_pipeline` is a English model originally trained by suthanhcong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dourc_pipeline_en_5.5.0_3.0_1725654850804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dourc_pipeline_en_5.5.0_3.0_1725654850804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_dourc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_dourc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_dourc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/suthanhcong/distilbert-base-uncased-finetuned-DouRC + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_en.md new file mode 100644 index 00000000000000..cfd5a3014f9798 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_sjhong DistilBertForSequenceClassification from sjhong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_sjhong +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_sjhong` is a English model originally trained by sjhong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_sjhong_en_5.5.0_3.0_1725607934042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_sjhong_en_5.5.0_3.0_1725607934042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_sjhong","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_sjhong", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_sjhong| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sjhong/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en.md new file mode 100644 index 00000000000000..1744ecc1f05ae4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_sjhong_pipeline pipeline DistilBertForSequenceClassification from sjhong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_sjhong_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_sjhong_pipeline` is a English model originally trained by sjhong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en_5.5.0_3.0_1725607946349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_sjhong_pipeline_en_5.5.0_3.0_1725607946349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_sjhong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_sjhong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_sjhong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sjhong/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_skillripper_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_skillripper_en.md new file mode 100644 index 00000000000000..4bccf9c12fb0ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_emotion_skillripper_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_skillripper DistilBertForSequenceClassification from SkillRipper +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_skillripper +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_skillripper` is a English model originally trained by SkillRipper. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_skillripper_en_5.5.0_3.0_1725608244288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_skillripper_en_5.5.0_3.0_1725608244288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_skillripper","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_skillripper", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_skillripper| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/SkillRipper/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_goemotion_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_goemotion_en.md new file mode 100644 index 00000000000000..032d4dc59f55df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_goemotion_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_goemotion DistilBertForSequenceClassification from farzanmrz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_goemotion +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_goemotion` is a English model originally trained by farzanmrz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_goemotion_en_5.5.0_3.0_1725608538119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_goemotion_en_5.5.0_3.0_1725608538119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_goemotion","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_goemotion", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_goemotion| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/farzanmrz/distilbert-base-uncased-finetuned-goemotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_en.md new file mode 100644 index 00000000000000..cbc3a7edd8bd16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_hotels DistilBertEmbeddings from saiki0684 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_hotels +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_hotels` is a English model originally trained by saiki0684. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hotels_en_5.5.0_3.0_1725639129065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hotels_en_5.5.0_3.0_1725639129065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_hotels","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_hotels","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_hotels| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/saiki0684/distilbert-base-uncased-finetuned-hotels \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_pipeline_en.md new file mode 100644 index 00000000000000..031307593b34d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_hotels_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_hotels_pipeline pipeline DistilBertEmbeddings from saiki0684 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_hotels_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_hotels_pipeline` is a English model originally trained by saiki0684. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hotels_pipeline_en_5.5.0_3.0_1725639143243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hotels_pipeline_en_5.5.0_3.0_1725639143243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_hotels_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_hotels_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_hotels_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/saiki0684/distilbert-base-uncased-finetuned-hotels + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en.md new file mode 100644 index 00000000000000..b8762d6854feb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom DistilBertEmbeddings from BlitherBoom +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom` is a English model originally trained by BlitherBoom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en_5.5.0_3.0_1725639129294.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_en_5.5.0_3.0_1725639129294.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BlitherBoom/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en.md new file mode 100644 index 00000000000000..beaebed61dcdcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline pipeline DistilBertEmbeddings from BlitherBoom +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline` is a English model originally trained by BlitherBoom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en_5.5.0_3.0_1725639142572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline_en_5.5.0_3.0_1725639142572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_blitherboom_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BlitherBoom/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en.md new file mode 100644 index 00000000000000..6ef235c114837c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_haotiany DistilBertEmbeddings from haotiany +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_haotiany +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_haotiany` is a English model originally trained by haotiany. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en_5.5.0_3.0_1725639446273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_haotiany_en_5.5.0_3.0_1725639446273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_haotiany","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_haotiany","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_haotiany| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/haotiany/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en.md new file mode 100644 index 00000000000000..a7b6758cdf9706 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas DistilBertEmbeddings from ipvikas +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas` is a English model originally trained by ipvikas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en_5.5.0_3.0_1725664607792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_en_5.5.0_3.0_1725664607792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ipvikas/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en.md new file mode 100644 index 00000000000000..26889451fdbd7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline pipeline DistilBertEmbeddings from ipvikas +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline` is a English model originally trained by ipvikas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en_5.5.0_3.0_1725664619424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline_en_5.5.0_3.0_1725664619424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_ipvikas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ipvikas/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en.md new file mode 100644 index 00000000000000..f24e62537fc132 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir DistilBertEmbeddings from muhbdeir +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir` is a English model originally trained by muhbdeir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en_5.5.0_3.0_1725665240369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir_en_5.5.0_3.0_1725665240369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_muhbdeir| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/muhbdeir/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en.md new file mode 100644 index 00000000000000..ff81b00195ff7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2 DistilBertEmbeddings from BanUrsus +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en_5.5.0_3.0_1725665049406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_en_5.5.0_3.0_1725665049406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BanUrsus/distilbert-base-uncased-finetuned-imdb-accelerate_nlp-course-chapter7-section2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en.md new file mode 100644 index 00000000000000..53103566596813 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline pipeline DistilBertEmbeddings from BanUrsus +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en_5.5.0_3.0_1725665062106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline_en_5.5.0_3.0_1725665062106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_nlp_course_chapter7_section2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BanUrsus/distilbert-base-uncased-finetuned-imdb-accelerate_nlp-course-chapter7-section2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en.md new file mode 100644 index 00000000000000..a810aa8ff708d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon DistilBertEmbeddings from ValentinGuigon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon` is a English model originally trained by ValentinGuigon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en_5.5.0_3.0_1725664668298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_en_5.5.0_3.0_1725664668298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ValentinGuigon/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en.md new file mode 100644 index 00000000000000..248c2905a8316e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline pipeline DistilBertEmbeddings from ValentinGuigon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline` is a English model originally trained by ValentinGuigon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en_5.5.0_3.0_1725664680454.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline_en_5.5.0_3.0_1725664680454.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_valentinguigon_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ValentinGuigon/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_baobao88_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_baobao88_en.md new file mode 100644 index 00000000000000..c190c19830a67f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_baobao88_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_baobao88 DistilBertEmbeddings from baobao88 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_baobao88 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_baobao88` is a English model originally trained by baobao88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_baobao88_en_5.5.0_3.0_1725639279771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_baobao88_en_5.5.0_3.0_1725639279771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_baobao88","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_baobao88","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_baobao88| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/baobao88/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_beccacohen_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_beccacohen_en.md new file mode 100644 index 00000000000000..9513ed66c3a649 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_beccacohen_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_beccacohen DistilBertEmbeddings from beccacohen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_beccacohen +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_beccacohen` is a English model originally trained by beccacohen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_beccacohen_en_5.5.0_3.0_1725639383239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_beccacohen_en_5.5.0_3.0_1725639383239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_beccacohen","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_beccacohen","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_beccacohen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/beccacohen/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en.md new file mode 100644 index 00000000000000..9c9eaf6aefca19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline pipeline DistilBertEmbeddings from HamzaSidhu786 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline` is a English model originally trained by HamzaSidhu786. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en_5.5.0_3.0_1725665143465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline_en_5.5.0_3.0_1725665143465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_hamzasidhu786_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/HamzaSidhu786/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en.md new file mode 100644 index 00000000000000..92bbd5b6c77237 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate DistilBertEmbeddings from thuann2cats +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate` is a English model originally trained by thuann2cats. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en_5.5.0_3.0_1725664827672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate_en_5.5.0_3.0_1725664827672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_hf_tutorial_using_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/thuann2cats/distilbert-base-uncased-finetuned-imdb-HF-tutorial-using-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_lb100_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_lb100_en.md new file mode 100644 index 00000000000000..87bd64f6e97199 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_lb100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_lb100 DistilBertEmbeddings from LB100 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_lb100 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_lb100` is a English model originally trained by LB100. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lb100_en_5.5.0_3.0_1725664688641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lb100_en_5.5.0_3.0_1725664688641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lb100","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lb100","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_lb100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/LB100/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en.md new file mode 100644 index 00000000000000..f6c1a4081a7076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mireya25_pipeline pipeline DistilBertEmbeddings from Mireya25 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mireya25_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mireya25_pipeline` is a English model originally trained by Mireya25. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en_5.5.0_3.0_1725665209529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mireya25_pipeline_en_5.5.0_3.0_1725665209529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_mireya25_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_mireya25_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mireya25_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Mireya25/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en.md new file mode 100644 index 00000000000000..e8c0e37388b3f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rikrim_pipeline pipeline DistilBertEmbeddings from RiKrim +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rikrim_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rikrim_pipeline` is a English model originally trained by RiKrim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en_5.5.0_3.0_1725639299792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rikrim_pipeline_en_5.5.0_3.0_1725639299792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rikrim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rikrim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rikrim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RiKrim/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en.md new file mode 100644 index 00000000000000..4ecb9bebee3a59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline pipeline DistilBertEmbeddings from ronenh24 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline` is a English model originally trained by ronenh24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en_5.5.0_3.0_1725639665950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline_en_5.5.0_3.0_1725639665950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ronenh24_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ronenh24/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_en.md new file mode 100644 index 00000000000000..3f7c38ea4e68fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_arundhati87 DistilBertForTokenClassification from arundhati87 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_arundhati87 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_arundhati87` is a English model originally trained by arundhati87. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_arundhati87_en_5.5.0_3.0_1725653715195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_arundhati87_en_5.5.0_3.0_1725653715195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_arundhati87","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_arundhati87", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_arundhati87| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/arundhati87/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en.md new file mode 100644 index 00000000000000..181f3e6559aee0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_arundhati87_pipeline pipeline DistilBertForTokenClassification from arundhati87 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_arundhati87_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_arundhati87_pipeline` is a English model originally trained by arundhati87. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en_5.5.0_3.0_1725653727719.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_arundhati87_pipeline_en_5.5.0_3.0_1725653727719.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_arundhati87_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_arundhati87_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_arundhati87_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/arundhati87/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_cerastes_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_cerastes_en.md new file mode 100644 index 00000000000000..a929e45337c2d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_cerastes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_cerastes DistilBertForTokenClassification from Cerastes +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_cerastes +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_cerastes` is a English model originally trained by Cerastes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_cerastes_en_5.5.0_3.0_1725653402601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_cerastes_en_5.5.0_3.0_1725653402601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_cerastes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_cerastes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_cerastes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Cerastes/distilbert-base-uncased_finetuned_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_en.md new file mode 100644 index 00000000000000..e03aaf671ac001 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_iamdev DistilBertForTokenClassification from iamdev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_iamdev +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_iamdev` is a English model originally trained by iamdev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_iamdev_en_5.5.0_3.0_1725599615315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_iamdev_en_5.5.0_3.0_1725599615315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_iamdev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_iamdev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_iamdev| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/iamdev/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en.md new file mode 100644 index 00000000000000..ca179142e96483 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_iamdev_pipeline pipeline DistilBertForTokenClassification from iamdev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_iamdev_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_iamdev_pipeline` is a English model originally trained by iamdev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en_5.5.0_3.0_1725599627500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_iamdev_pipeline_en_5.5.0_3.0_1725599627500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_iamdev_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_iamdev_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_iamdev_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/iamdev/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_en.md new file mode 100644 index 00000000000000..d14a9c09c84e9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_steven668 DistilBertForTokenClassification from Steven668 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_steven668 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_steven668` is a English model originally trained by Steven668. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_steven668_en_5.5.0_3.0_1725653871982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_steven668_en_5.5.0_3.0_1725653871982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_steven668","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_steven668", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_steven668| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Steven668/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_pipeline_en.md new file mode 100644 index 00000000000000..493cd9b32ae8ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_ner_steven668_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_steven668_pipeline pipeline DistilBertForTokenClassification from Steven668 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_steven668_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_steven668_pipeline` is a English model originally trained by Steven668. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_steven668_pipeline_en_5.5.0_3.0_1725653884015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_steven668_pipeline_en_5.5.0_3.0_1725653884015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_steven668_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_steven668_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_steven668_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Steven668/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en.md new file mode 100644 index 00000000000000..7fad3f7b538063 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions DistilBertForSequenceClassification from jcesquivel +author: John Snow Labs +name: distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions` is a English model originally trained by jcesquivel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en_5.5.0_3.0_1725608433515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions_en_5.5.0_3.0_1725608433515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_sentiment_amazon_finetuned_emotions| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jcesquivel/distilbert-base-uncased-finetuned-sentiment-amazon-finetuned-emotions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_blaze07_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_blaze07_en.md new file mode 100644 index 00000000000000..ec8945184f78f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_blaze07_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_blaze07 DistilBertForQuestionAnswering from Blaze07 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_blaze07 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_blaze07` is a English model originally trained by Blaze07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_blaze07_en_5.5.0_3.0_1725652227200.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_blaze07_en_5.5.0_3.0_1725652227200.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_blaze07","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_blaze07", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_blaze07| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Blaze07/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md new file mode 100644 index 00000000000000..2e3e7f7d3f17b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dkimds DistilBertEmbeddings from dkimds +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dkimds +date: 2024-09-06 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dkimds` is a English model originally trained by dkimds. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en_5.5.0_3.0_1725654450970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en_5.5.0_3.0_1725654450970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dkimds| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/dkimds/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en.md new file mode 100644 index 00000000000000..1daec903df5a34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline pipeline DistilBertForQuestionAnswering from dkimds +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline` is a English model originally trained by dkimds. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en_5.5.0_3.0_1725654463741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline_en_5.5.0_3.0_1725654463741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dkimds/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md new file mode 100644 index 00000000000000..a72f3afff2215b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_maseiya DistilBertEmbeddings from maseiya +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_maseiya +date: 2024-09-06 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_maseiya` is a English model originally trained by maseiya. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en_5.5.0_3.0_1725652415493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en_5.5.0_3.0_1725652415493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_maseiya","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_maseiya", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_maseiya| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/maseiya/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md new file mode 100644 index 00000000000000..b5e43f93b8bbd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob DistilBertEmbeddings from miesnerjacob +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob +date: 2024-09-06 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob` is a English model originally trained by miesnerjacob. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en_5.5.0_3.0_1725652363997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en_5.5.0_3.0_1725652363997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en.md new file mode 100644 index 00000000000000..a17e04f9c25a6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_scmis011 DistilBertForQuestionAnswering from scmis011 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_scmis011 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_scmis011` is a English model originally trained by scmis011. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en_5.5.0_3.0_1725621852398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_en_5.5.0_3.0_1725621852398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_scmis011","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_scmis011", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_scmis011| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/scmis011/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en.md new file mode 100644 index 00000000000000..8cf95616184819 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline pipeline DistilBertForQuestionAnswering from scmis011 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline` is a English model originally trained by scmis011. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en_5.5.0_3.0_1725621865161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline_en_5.5.0_3.0_1725621865161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_scmis011_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/scmis011/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md new file mode 100644 index 00000000000000..282ef755d89f2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sebastians DistilBertEmbeddings from SebastianS +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sebastians +date: 2024-09-06 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sebastians` is a English model originally trained by SebastianS. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en_5.5.0_3.0_1725655011909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en_5.5.0_3.0_1725655011909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sebastians| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/SebastianS/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en.md new file mode 100644 index 00000000000000..28e0461b244d33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline pipeline DistilBertForQuestionAnswering from SebastianS +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline` is a English model originally trained by SebastianS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en_5.5.0_3.0_1725655023200.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline_en_5.5.0_3.0_1725655023200.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/SebastianS/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en.md new file mode 100644 index 00000000000000..df6258891c2c86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline pipeline DistilBertForQuestionAnswering from ShadowTwin41 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline` is a English model originally trained by ShadowTwin41. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en_5.5.0_3.0_1725654572451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline_en_5.5.0_3.0_1725654572451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ShadowTwin41/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_en.md new file mode 100644 index 00000000000000..d5fa4fece66cbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_katxtong DistilBertForQuestionAnswering from katxtong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_katxtong +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_katxtong` is a English model originally trained by katxtong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_katxtong_en_5.5.0_3.0_1725621593293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_katxtong_en_5.5.0_3.0_1725621593293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_katxtong","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_katxtong", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_katxtong| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/katxtong/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en.md new file mode 100644 index 00000000000000..80103f0f1317dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_katxtong_pipeline pipeline DistilBertForQuestionAnswering from katxtong +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_katxtong_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_katxtong_pipeline` is a English model originally trained by katxtong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en_5.5.0_3.0_1725621605223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_katxtong_pipeline_en_5.5.0_3.0_1725621605223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_katxtong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_katxtong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_katxtong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/katxtong/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_en.md new file mode 100644 index 00000000000000..da4a6113366e1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_shyamkant DistilBertForQuestionAnswering from Shyamkant +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_shyamkant +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_shyamkant` is a English model originally trained by Shyamkant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_shyamkant_en_5.5.0_3.0_1725621696707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_shyamkant_en_5.5.0_3.0_1725621696707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_shyamkant","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_shyamkant", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_shyamkant| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Shyamkant/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en.md new file mode 100644 index 00000000000000..45482249415d46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_shyamkant_pipeline pipeline DistilBertForQuestionAnswering from Shyamkant +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_shyamkant_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_shyamkant_pipeline` is a English model originally trained by Shyamkant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en_5.5.0_3.0_1725621708839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_shyamkant_pipeline_en_5.5.0_3.0_1725621708839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_shyamkant_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_shyamkant_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_shyamkant_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Shyamkant/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en.md new file mode 100644 index 00000000000000..7883ccd52b40c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_toorgil_pipeline pipeline DistilBertForQuestionAnswering from toorgil +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_toorgil_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_toorgil_pipeline` is a English model originally trained by toorgil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en_5.5.0_3.0_1725652108700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_toorgil_pipeline_en_5.5.0_3.0_1725652108700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_toorgil_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_toorgil_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_toorgil_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/toorgil/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_it.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_it.md new file mode 100644 index 00000000000000..b810d8a63f591a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_it.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Italian distilbert_base_uncased_italian_chamorro_cree_entry_classification DistilBertForSequenceClassification from reflex-project +author: John Snow Labs +name: distilbert_base_uncased_italian_chamorro_cree_entry_classification +date: 2024-09-06 +tags: [it, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_italian_chamorro_cree_entry_classification` is a Italian model originally trained by reflex-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_italian_chamorro_cree_entry_classification_it_5.5.0_3.0_1725608239002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_italian_chamorro_cree_entry_classification_it_5.5.0_3.0_1725608239002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_italian_chamorro_cree_entry_classification","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_italian_chamorro_cree_entry_classification", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_italian_chamorro_cree_entry_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|it| +|Size:|249.5 MB| + +## References + +https://huggingface.co/reflex-project/distilbert-base-uncased-italian-ch-cr-entry-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it.md new file mode 100644 index 00000000000000..975d940a6d96cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Italian distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline pipeline DistilBertForSequenceClassification from reflex-project +author: John Snow Labs +name: distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline +date: 2024-09-06 +tags: [it, open_source, pipeline, onnx] +task: Text Classification +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline` is a Italian model originally trained by reflex-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it_5.5.0_3.0_1725608250509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline_it_5.5.0_3.0_1725608250509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_italian_chamorro_cree_entry_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|249.5 MB| + +## References + +https://huggingface.co/reflex-project/distilbert-base-uncased-italian-ch-cr-entry-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_en.md new file mode 100644 index 00000000000000..cc8abbb5a89556 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_on_mini_finer DistilBertForTokenClassification from baluyotraf +author: John Snow Labs +name: distilbert_base_uncased_on_mini_finer +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_on_mini_finer` is a English model originally trained by baluyotraf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_on_mini_finer_en_5.5.0_3.0_1725599178371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_on_mini_finer_en_5.5.0_3.0_1725599178371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_on_mini_finer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_on_mini_finer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_on_mini_finer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/baluyotraf/distilbert-base-uncased-on-mini-finer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_pipeline_en.md new file mode 100644 index 00000000000000..4818df10294895 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_on_mini_finer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_on_mini_finer_pipeline pipeline DistilBertForTokenClassification from baluyotraf +author: John Snow Labs +name: distilbert_base_uncased_on_mini_finer_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_on_mini_finer_pipeline` is a English model originally trained by baluyotraf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_on_mini_finer_pipeline_en_5.5.0_3.0_1725599190568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_on_mini_finer_pipeline_en_5.5.0_3.0_1725599190568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_on_mini_finer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_on_mini_finer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_on_mini_finer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/baluyotraf/distilbert-base-uncased-on-mini-finer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_question_answering_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_question_answering_pipeline_en.md new file mode 100644 index 00000000000000..831a7c9fed3933 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_question_answering_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_question_answering_pipeline pipeline DistilBertForQuestionAnswering from farzanrahmani +author: John Snow Labs +name: distilbert_base_uncased_question_answering_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_question_answering_pipeline` is a English model originally trained by farzanrahmani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_question_answering_pipeline_en_5.5.0_3.0_1725654790122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_question_answering_pipeline_en_5.5.0_3.0_1725654790122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_question_answering_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_question_answering_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_question_answering_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/farzanrahmani/distilbert_base_uncased_question_answering + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_en.md new file mode 100644 index 00000000000000..24abe1933b849b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_p5 DistilBertForQuestionAnswering from pminha +author: John Snow Labs +name: distilbert_base_uncased_squad2_p5 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_p5` is a English model originally trained by pminha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p5_en_5.5.0_3.0_1725654814001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p5_en_5.5.0_3.0_1725654814001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_p5","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_p5", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_p5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|242.8 MB| + +## References + +https://huggingface.co/pminha/distilbert-base-uncased-squad2-p5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_pipeline_en.md new file mode 100644 index 00000000000000..34f14b8fff4010 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_squad2_p5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_p5_pipeline pipeline DistilBertForQuestionAnswering from pminha +author: John Snow Labs +name: distilbert_base_uncased_squad2_p5_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_p5_pipeline` is a English model originally trained by pminha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p5_pipeline_en_5.5.0_3.0_1725654827059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p5_pipeline_en_5.5.0_3.0_1725654827059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_squad2_p5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_squad2_p5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_p5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|242.8 MB| + +## References + +https://huggingface.co/pminha/distilbert-base-uncased-squad2-p5 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en.md new file mode 100644 index 00000000000000..f25e35826a5b9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2 DistilBertEmbeddings from BanUrsus +author: John Snow Labs +name: distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en_5.5.0_3.0_1725639744215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2_en_5.5.0_3.0_1725639744215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_whole_word_masking_finetuned_imdb_nlp_course_chapter7_section2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BanUrsus/distilbert-base-uncased-whole-word-masking-finetuned-imdb_nlp-course-chapter7-section2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_conll2003_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_conll2003_en.md new file mode 100644 index 00000000000000..0b4e882f0ed8ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_conll2003_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_conll2003 DistilBertForTokenClassification from dheerajmekala +author: John Snow Labs +name: distilbert_conll2003 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_conll2003` is a English model originally trained by dheerajmekala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_conll2003_en_5.5.0_3.0_1725653414156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_conll2003_en_5.5.0_3.0_1725653414156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_conll2003| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dheerajmekala/distilbert_conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_en.md new file mode 100644 index 00000000000000..3b6fd5a4cbe254 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_emotion_ahm123 DistilBertForSequenceClassification from Ahm123 +author: John Snow Labs +name: distilbert_emotion_ahm123 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_emotion_ahm123` is a English model originally trained by Ahm123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_emotion_ahm123_en_5.5.0_3.0_1725608141677.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_emotion_ahm123_en_5.5.0_3.0_1725608141677.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_emotion_ahm123","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_emotion_ahm123", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_emotion_ahm123| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Ahm123/distilbert-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_pipeline_en.md new file mode 100644 index 00000000000000..ecb79887274225 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_emotion_ahm123_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_emotion_ahm123_pipeline pipeline DistilBertForSequenceClassification from Ahm123 +author: John Snow Labs +name: distilbert_emotion_ahm123_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_emotion_ahm123_pipeline` is a English model originally trained by Ahm123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_emotion_ahm123_pipeline_en_5.5.0_3.0_1725608153198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_emotion_ahm123_pipeline_en_5.5.0_3.0_1725608153198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_emotion_ahm123_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_emotion_ahm123_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_emotion_ahm123_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Ahm123/distilbert-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_extractive_qa_project_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_extractive_qa_project_en.md new file mode 100644 index 00000000000000..b56a313e4755c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_extractive_qa_project_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_extractive_qa_project DistilBertForQuestionAnswering from amara16 +author: John Snow Labs +name: distilbert_extractive_qa_project +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_extractive_qa_project` is a English model originally trained by amara16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_extractive_qa_project_en_5.5.0_3.0_1725652743051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_extractive_qa_project_en_5.5.0_3.0_1725652743051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_extractive_qa_project","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_extractive_qa_project", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_extractive_qa_project| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/amara16/distilbert-extractive-qa-project \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_en.md new file mode 100644 index 00000000000000..0c387fc7ad90a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetune_own_data_model DistilBertForTokenClassification from senthil2002 +author: John Snow Labs +name: distilbert_finetune_own_data_model +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetune_own_data_model` is a English model originally trained by senthil2002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetune_own_data_model_en_5.5.0_3.0_1725599414325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetune_own_data_model_en_5.5.0_3.0_1725599414325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetune_own_data_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetune_own_data_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetune_own_data_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/senthil2002/distilbert_finetune_own_data_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_pipeline_en.md new file mode 100644 index 00000000000000..e2f7cbac6b96ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetune_own_data_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetune_own_data_model_pipeline pipeline DistilBertForTokenClassification from senthil2002 +author: John Snow Labs +name: distilbert_finetune_own_data_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetune_own_data_model_pipeline` is a English model originally trained by senthil2002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetune_own_data_model_pipeline_en_5.5.0_3.0_1725599426254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetune_own_data_model_pipeline_en_5.5.0_3.0_1725599426254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetune_own_data_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetune_own_data_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetune_own_data_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/senthil2002/distilbert_finetune_own_data_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ai4privacy_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ai4privacy_v2_pipeline_en.md new file mode 100644 index 00000000000000..642d1cb8c5c0c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ai4privacy_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_ai4privacy_v2_pipeline pipeline DistilBertForTokenClassification from Isotonic +author: John Snow Labs +name: distilbert_finetuned_ai4privacy_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ai4privacy_v2_pipeline` is a English model originally trained by Isotonic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ai4privacy_v2_pipeline_en_5.5.0_3.0_1725599269633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ai4privacy_v2_pipeline_en_5.5.0_3.0_1725599269633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_ai4privacy_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_ai4privacy_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ai4privacy_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/Isotonic/distilbert_finetuned_ai4privacy_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_amanpatkar_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_amanpatkar_pipeline_en.md new file mode 100644 index 00000000000000..788ef5caf5c55a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_amanpatkar_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_ner_amanpatkar_pipeline pipeline DistilBertForTokenClassification from amanpatkar +author: John Snow Labs +name: distilbert_finetuned_ner_amanpatkar_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_amanpatkar_pipeline` is a English model originally trained by amanpatkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_amanpatkar_pipeline_en_5.5.0_3.0_1725653303809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_amanpatkar_pipeline_en_5.5.0_3.0_1725653303809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_ner_amanpatkar_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_ner_amanpatkar_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_amanpatkar_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/amanpatkar/distilbert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_en.md new file mode 100644 index 00000000000000..2def969539873e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_ner_mido545 DistilBertForTokenClassification from mido545 +author: John Snow Labs +name: distilbert_finetuned_ner_mido545 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_mido545` is a English model originally trained by mido545. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_mido545_en_5.5.0_3.0_1725653752279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_mido545_en_5.5.0_3.0_1725653752279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_ner_mido545","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_ner_mido545", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_mido545| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/mido545/distilbert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_pipeline_en.md new file mode 100644 index 00000000000000..69647ffea4c934 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_ner_mido545_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_ner_mido545_pipeline pipeline DistilBertForTokenClassification from mido545 +author: John Snow Labs +name: distilbert_finetuned_ner_mido545_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_mido545_pipeline` is a English model originally trained by mido545. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_mido545_pipeline_en_5.5.0_3.0_1725653764481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_mido545_pipeline_en_5.5.0_3.0_1725653764481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_ner_mido545_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_ner_mido545_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_mido545_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/mido545/distilbert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_en.md new file mode 100644 index 00000000000000..c8baa342abf0ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_finetuned_oppo DistilBertForTokenClassification from dtorber +author: John Snow Labs +name: distilbert_finetuned_oppo +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_oppo` is a English model originally trained by dtorber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_oppo_en_5.5.0_3.0_1725653826082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_oppo_en_5.5.0_3.0_1725653826082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_oppo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_finetuned_oppo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_oppo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dtorber/distilbert-finetuned-oppo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_pipeline_en.md new file mode 100644 index 00000000000000..dbb496fbc98bfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_oppo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_oppo_pipeline pipeline DistilBertForTokenClassification from dtorber +author: John Snow Labs +name: distilbert_finetuned_oppo_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_oppo_pipeline` is a English model originally trained by dtorber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_oppo_pipeline_en_5.5.0_3.0_1725653838725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_oppo_pipeline_en_5.5.0_3.0_1725653838725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_oppo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_oppo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_oppo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dtorber/distilbert-finetuned-oppo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_en.md new file mode 100644 index 00000000000000..18b68545b2db12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squad_ashaduzzaman DistilBertForQuestionAnswering from ashaduzzaman +author: John Snow Labs +name: distilbert_finetuned_squad_ashaduzzaman +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squad_ashaduzzaman` is a English model originally trained by ashaduzzaman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_ashaduzzaman_en_5.5.0_3.0_1725621589241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_ashaduzzaman_en_5.5.0_3.0_1725621589241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squad_ashaduzzaman","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squad_ashaduzzaman", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squad_ashaduzzaman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ashaduzzaman/distilbert-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_pipeline_en.md new file mode 100644 index 00000000000000..6def7d4757d8a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_ashaduzzaman_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squad_ashaduzzaman_pipeline pipeline DistilBertForQuestionAnswering from ashaduzzaman +author: John Snow Labs +name: distilbert_finetuned_squad_ashaduzzaman_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squad_ashaduzzaman_pipeline` is a English model originally trained by ashaduzzaman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_ashaduzzaman_pipeline_en_5.5.0_3.0_1725621601261.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_ashaduzzaman_pipeline_en_5.5.0_3.0_1725621601261.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squad_ashaduzzaman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squad_ashaduzzaman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squad_ashaduzzaman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ashaduzzaman/distilbert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_droo303_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_droo303_pipeline_en.md new file mode 100644 index 00000000000000..53b9981c435764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squad_droo303_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squad_droo303_pipeline pipeline DistilBertForQuestionAnswering from droo303 +author: John Snow Labs +name: distilbert_finetuned_squad_droo303_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squad_droo303_pipeline` is a English model originally trained by droo303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_droo303_pipeline_en_5.5.0_3.0_1725655028626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_droo303_pipeline_en_5.5.0_3.0_1725655028626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squad_droo303_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squad_droo303_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squad_droo303_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/droo303/distilbert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_nmc_29092004_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_nmc_29092004_en.md new file mode 100644 index 00000000000000..45cf7387ddd387 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_nmc_29092004_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_nmc_29092004 DistilBertForQuestionAnswering from NMC-29092004 +author: John Snow Labs +name: distilbert_finetuned_squadv2_nmc_29092004 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_nmc_29092004` is a English model originally trained by NMC-29092004. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_nmc_29092004_en_5.5.0_3.0_1725655127432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_nmc_29092004_en_5.5.0_3.0_1725655127432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_nmc_29092004","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_nmc_29092004", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_nmc_29092004| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/NMC-29092004/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_en.md new file mode 100644 index 00000000000000..330fb7794d7577 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_vubacktracking DistilBertForQuestionAnswering from vubacktracking +author: John Snow Labs +name: distilbert_finetuned_squadv2_vubacktracking +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_vubacktracking` is a English model originally trained by vubacktracking. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_vubacktracking_en_5.5.0_3.0_1725621625009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_vubacktracking_en_5.5.0_3.0_1725621625009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_vubacktracking","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_vubacktracking", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_vubacktracking| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vubacktracking/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_pipeline_en.md new file mode 100644 index 00000000000000..a97bdca545365d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_finetuned_squadv2_vubacktracking_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_vubacktracking_pipeline pipeline DistilBertForQuestionAnswering from vubacktracking +author: John Snow Labs +name: distilbert_finetuned_squadv2_vubacktracking_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_vubacktracking_pipeline` is a English model originally trained by vubacktracking. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_vubacktracking_pipeline_en_5.5.0_3.0_1725621636522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_vubacktracking_pipeline_en_5.5.0_3.0_1725621636522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squadv2_vubacktracking_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squadv2_vubacktracking_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_vubacktracking_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vubacktracking/distilbert-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_en.md new file mode 100644 index 00000000000000..8861a6d32b87eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_maccrobat DistilBertForTokenClassification from SahuH +author: John Snow Labs +name: distilbert_maccrobat +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_maccrobat` is a English model originally trained by SahuH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_maccrobat_en_5.5.0_3.0_1725599288584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_maccrobat_en_5.5.0_3.0_1725599288584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_maccrobat","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_maccrobat", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_maccrobat| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/SahuH/distilbert-maccrobat \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_pipeline_en.md new file mode 100644 index 00000000000000..c32f5b69fc867d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_maccrobat_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_maccrobat_pipeline pipeline DistilBertForTokenClassification from SahuH +author: John Snow Labs +name: distilbert_maccrobat_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_maccrobat_pipeline` is a English model originally trained by SahuH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_maccrobat_pipeline_en_5.5.0_3.0_1725599300707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_maccrobat_pipeline_en_5.5.0_3.0_1725599300707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_maccrobat_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_maccrobat_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_maccrobat_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|244.0 MB| + +## References + +https://huggingface.co/SahuH/distilbert-maccrobat + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_en.md new file mode 100644 index 00000000000000..133b0af630a751 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_ner_initial DistilBertForTokenClassification from Azure-Heights +author: John Snow Labs +name: distilbert_ner_initial +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ner_initial` is a English model originally trained by Azure-Heights. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ner_initial_en_5.5.0_3.0_1725654142806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ner_initial_en_5.5.0_3.0_1725654142806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_ner_initial","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_ner_initial", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ner_initial| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Azure-Heights/distilbert-ner-initial \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_pipeline_en.md new file mode 100644 index 00000000000000..5296ad97cc12bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_ner_initial_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_ner_initial_pipeline pipeline DistilBertForTokenClassification from Azure-Heights +author: John Snow Labs +name: distilbert_ner_initial_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ner_initial_pipeline` is a English model originally trained by Azure-Heights. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ner_initial_pipeline_en_5.5.0_3.0_1725654154281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ner_initial_pipeline_en_5.5.0_3.0_1725654154281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_ner_initial_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_ner_initial_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ner_initial_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Azure-Heights/distilbert-ner-initial + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_spanish_model_xx.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_spanish_model_xx.md new file mode 100644 index 00000000000000..6d3565f355e3ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_spanish_model_xx.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Multilingual distilbert_qa_english_german_spanish_model DistilBertForQuestionAnswering from ZYW +author: John Snow Labs +name: distilbert_qa_english_german_spanish_model +date: 2024-09-06 +tags: [xx, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_english_german_spanish_model` is a Multilingual model originally trained by ZYW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_english_german_spanish_model_xx_5.5.0_3.0_1725622157467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_english_german_spanish_model_xx_5.5.0_3.0_1725622157467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_english_german_spanish_model","xx") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_english_german_spanish_model", "xx") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_english_german_spanish_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/ZYW/en-de-es-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx.md new file mode 100644 index 00000000000000..b181b3bf8a8838 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Multilingual distilbert_qa_english_german_vietnamese_chinese_spanish_model DistilBertForQuestionAnswering from ZYW +author: John Snow Labs +name: distilbert_qa_english_german_vietnamese_chinese_spanish_model +date: 2024-09-06 +tags: [xx, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_english_german_vietnamese_chinese_spanish_model` is a Multilingual model originally trained by ZYW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx_5.5.0_3.0_1725652853838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_english_german_vietnamese_chinese_spanish_model_xx_5.5.0_3.0_1725652853838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_english_german_vietnamese_chinese_spanish_model","xx") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_english_german_vietnamese_chinese_spanish_model", "xx") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_english_german_vietnamese_chinese_spanish_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/ZYW/en-de-vi-zh-es-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_01_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_01_en.md new file mode 100644 index 00000000000000..8b9d0036f53d51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_01_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English DistilBertForQuestionAnswering model (from minhdang241) +author: John Snow Labs +name: distilbert_qa_robustqa_baseline_01 +date: 2024-09-06 +tags: [en, open_source, distilbert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `robustqa-baseline-01` is a English model originally trained by `minhdang241`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_robustqa_baseline_01_en_5.5.0_3.0_1725652318441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_robustqa_baseline_01_en_5.5.0_3.0_1725652318441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_robustqa_baseline_01","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = DistilBertForQuestionAnswering.pretrained("distilbert_qa_robustqa_baseline_01","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.distil_bert.base.by_minhdang241").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_robustqa_baseline_01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +- https://huggingface.co/minhdang241/robustqa-baseline-01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_02_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_02_en.md new file mode 100644 index 00000000000000..e0f024eea5e28b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_robustqa_baseline_02_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English DistilBertForQuestionAnswering model (from leemii18) +author: John Snow Labs +name: distilbert_qa_robustqa_baseline_02 +date: 2024-09-06 +tags: [en, open_source, distilbert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `robustqa-baseline-02` is a English model originally trained by `leemii18`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_robustqa_baseline_02_en_5.5.0_3.0_1725621786263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_robustqa_baseline_02_en_5.5.0_3.0_1725621786263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_qa_robustqa_baseline_02","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = DistilBertForQuestionAnswering.pretrained("distilbert_qa_robustqa_baseline_02","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.distil_bert.base").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_robustqa_baseline_02| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +- https://huggingface.co/leemii18/robustqa-baseline-02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx.md new file mode 100644 index 00000000000000..f475fb12de0abc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Multilingual distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline pipeline DistilBertForQuestionAnswering from ZYW +author: John Snow Labs +name: distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Question Answering +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline` is a Multilingual model originally trained by ZYW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx_5.5.0_3.0_1725652470955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline_xx_5.5.0_3.0_1725652470955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_qa_squad_english_german_spanish_vietnamese_chinese_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/ZYW/squad-en-de-es-vi-zh-model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_en.md new file mode 100644 index 00000000000000..d62c7214a1b3dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_sentiment_analysis_socialmedia DistilBertForSequenceClassification from DT12the +author: John Snow Labs +name: distilbert_sentiment_analysis_socialmedia +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sentiment_analysis_socialmedia` is a English model originally trained by DT12the. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_socialmedia_en_5.5.0_3.0_1725608134589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_socialmedia_en_5.5.0_3.0_1725608134589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_sentiment_analysis_socialmedia","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_sentiment_analysis_socialmedia", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sentiment_analysis_socialmedia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/DT12the/distilBERT-sentiment-analysis-socialmedia \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_pipeline_en.md new file mode 100644 index 00000000000000..806f375b4f2a42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_sentiment_analysis_socialmedia_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_sentiment_analysis_socialmedia_pipeline pipeline DistilBertForSequenceClassification from DT12the +author: John Snow Labs +name: distilbert_sentiment_analysis_socialmedia_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sentiment_analysis_socialmedia_pipeline` is a English model originally trained by DT12the. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_socialmedia_pipeline_en_5.5.0_3.0_1725608146607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sentiment_analysis_socialmedia_pipeline_en_5.5.0_3.0_1725608146607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_sentiment_analysis_socialmedia_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_sentiment_analysis_socialmedia_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sentiment_analysis_socialmedia_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/DT12the/distilBERT-sentiment-analysis-socialmedia + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbert_tokenizer_256k_mlm_750k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbert_tokenizer_256k_mlm_750k_pipeline_en.md new file mode 100644 index 00000000000000..7ad9e670d82793 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbert_tokenizer_256k_mlm_750k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_tokenizer_256k_mlm_750k_pipeline pipeline DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_tokenizer_256k_mlm_750k_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tokenizer_256k_mlm_750k_pipeline` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_750k_pipeline_en_5.5.0_3.0_1725639799664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tokenizer_256k_mlm_750k_pipeline_en_5.5.0_3.0_1725639799664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_tokenizer_256k_mlm_750k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_tokenizer_256k_mlm_750k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tokenizer_256k_mlm_750k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|891.9 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-tokenizer_256k-MLM_750k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_en.md new file mode 100644 index 00000000000000..096e401a9fe08f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbertbaselinehsthreeepoch DistilBertForQuestionAnswering from KarthikAlagarsamy +author: John Snow Labs +name: distilbertbaselinehsthreeepoch +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertbaselinehsthreeepoch` is a English model originally trained by KarthikAlagarsamy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertbaselinehsthreeepoch_en_5.5.0_3.0_1725654481693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertbaselinehsthreeepoch_en_5.5.0_3.0_1725654481693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbertbaselinehsthreeepoch","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbertbaselinehsthreeepoch", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertbaselinehsthreeepoch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KarthikAlagarsamy/distilbertbaselineHSthreeepoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_pipeline_en.md new file mode 100644 index 00000000000000..9e96966419053b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilbertbaselinehsthreeepoch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbertbaselinehsthreeepoch_pipeline pipeline DistilBertForQuestionAnswering from KarthikAlagarsamy +author: John Snow Labs +name: distilbertbaselinehsthreeepoch_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertbaselinehsthreeepoch_pipeline` is a English model originally trained by KarthikAlagarsamy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertbaselinehsthreeepoch_pipeline_en_5.5.0_3.0_1725654493325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertbaselinehsthreeepoch_pipeline_en_5.5.0_3.0_1725654493325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbertbaselinehsthreeepoch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbertbaselinehsthreeepoch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertbaselinehsthreeepoch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KarthikAlagarsamy/distilbertbaselineHSthreeepoch + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distillbert_newscategoryclassification_fullmodel_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-distillbert_newscategoryclassification_fullmodel_3_en.md new file mode 100644 index 00000000000000..666e7087a2959b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distillbert_newscategoryclassification_fullmodel_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_newscategoryclassification_fullmodel_3 DistilBertForSequenceClassification from akashmaggon +author: John Snow Labs +name: distillbert_newscategoryclassification_fullmodel_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_newscategoryclassification_fullmodel_3` is a English model originally trained by akashmaggon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_newscategoryclassification_fullmodel_3_en_5.5.0_3.0_1725608335014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_newscategoryclassification_fullmodel_3_en_5.5.0_3.0_1725608335014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_newscategoryclassification_fullmodel_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_newscategoryclassification_fullmodel_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_newscategoryclassification_fullmodel_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|250.4 MB| + +## References + +https://huggingface.co/akashmaggon/distillbert-newscategoryclassification-fullmodel-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_en.md new file mode 100644 index 00000000000000..de74a215e8f79b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_distilbert RoBertaEmbeddings from distilbert +author: John Snow Labs +name: distilroberta_base_distilbert +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_distilbert` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_distilbert_en_5.5.0_3.0_1725660667348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_distilbert_en_5.5.0_3.0_1725660667348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_distilbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_distilbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.3 MB| + +## References + +https://huggingface.co/distilbert/distilroberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..6b4700b18c105c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-distilroberta_base_distilbert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_base_distilbert_pipeline pipeline RoBertaEmbeddings from distilbert +author: John Snow Labs +name: distilroberta_base_distilbert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_distilbert_pipeline` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_distilbert_pipeline_en_5.5.0_3.0_1725660683195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_distilbert_pipeline_en_5.5.0_3.0_1725660683195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.3 MB| + +## References + +https://huggingface.co/distilbert/distilroberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_en.md new file mode 100644 index 00000000000000..f1576f15993ff0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_disregard CamemBertEmbeddings from JayBDev +author: John Snow Labs +name: dummy_disregard +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_disregard` is a English model originally trained by JayBDev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_disregard_en_5.5.0_3.0_1725637028731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_disregard_en_5.5.0_3.0_1725637028731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_disregard","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_disregard","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_disregard| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/JayBDev/dummy-disregard \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_pipeline_en.md new file mode 100644 index 00000000000000..dfc773baee170c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_disregard_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_disregard_pipeline pipeline CamemBertEmbeddings from JayBDev +author: John Snow Labs +name: dummy_disregard_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_disregard_pipeline` is a English model originally trained by JayBDev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_disregard_pipeline_en_5.5.0_3.0_1725637107740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_disregard_pipeline_en_5.5.0_3.0_1725637107740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_disregard_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_disregard_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_disregard_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/JayBDev/dummy-disregard + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_jdonnelly0804_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_jdonnelly0804_en.md new file mode 100644 index 00000000000000..946b53cadd78fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_jdonnelly0804_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_jdonnelly0804 CamemBertEmbeddings from jdonnelly0804 +author: John Snow Labs +name: dummy_jdonnelly0804 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_jdonnelly0804` is a English model originally trained by jdonnelly0804. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_jdonnelly0804_en_5.5.0_3.0_1725637120501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_jdonnelly0804_en_5.5.0_3.0_1725637120501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_jdonnelly0804","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_jdonnelly0804","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_jdonnelly0804| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/jdonnelly0804/dummy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_en.md new file mode 100644 index 00000000000000..8b156e3bb41aab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_2_ysharma CamemBertEmbeddings from ysharma +author: John Snow Labs +name: dummy_model_2_ysharma +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_2_ysharma` is a English model originally trained by ysharma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_2_ysharma_en_5.5.0_3.0_1725637586984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_2_ysharma_en_5.5.0_3.0_1725637586984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_2_ysharma","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_2_ysharma","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_2_ysharma| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ysharma/dummy-model-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_pipeline_en.md new file mode 100644 index 00000000000000..dbb71b84d71d3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_2_ysharma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_2_ysharma_pipeline pipeline CamemBertEmbeddings from ysharma +author: John Snow Labs +name: dummy_model_2_ysharma_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_2_ysharma_pipeline` is a English model originally trained by ysharma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_2_ysharma_pipeline_en_5.5.0_3.0_1725637663026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_2_ysharma_pipeline_en_5.5.0_3.0_1725637663026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_2_ysharma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_2_ysharma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_2_ysharma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ysharma/dummy-model-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_en.md new file mode 100644 index 00000000000000..65a1f5d7d2eb4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_alex_deng CamemBertEmbeddings from Alex-Deng +author: John Snow Labs +name: dummy_model_alex_deng +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_alex_deng` is a English model originally trained by Alex-Deng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_alex_deng_en_5.5.0_3.0_1725636749581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_alex_deng_en_5.5.0_3.0_1725636749581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_alex_deng","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_alex_deng","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_alex_deng| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Alex-Deng/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_pipeline_en.md new file mode 100644 index 00000000000000..04000c6b7c2da9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alex_deng_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_alex_deng_pipeline pipeline CamemBertEmbeddings from Alex-Deng +author: John Snow Labs +name: dummy_model_alex_deng_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_alex_deng_pipeline` is a English model originally trained by Alex-Deng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_alex_deng_pipeline_en_5.5.0_3.0_1725636828165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_alex_deng_pipeline_en_5.5.0_3.0_1725636828165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_alex_deng_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_alex_deng_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_alex_deng_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Alex-Deng/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alexey_wk_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alexey_wk_en.md new file mode 100644 index 00000000000000..3b453d690bde89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_alexey_wk_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_alexey_wk CamemBertEmbeddings from alexey-wk +author: John Snow Labs +name: dummy_model_alexey_wk +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_alexey_wk` is a English model originally trained by alexey-wk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_alexey_wk_en_5.5.0_3.0_1725632835415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_alexey_wk_en_5.5.0_3.0_1725632835415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_alexey_wk","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_alexey_wk","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_alexey_wk| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/alexey-wk/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_en.md new file mode 100644 index 00000000000000..3bce5913d46e3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_aokikenichi CamemBertEmbeddings from aokikenichi +author: John Snow Labs +name: dummy_model_aokikenichi +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_aokikenichi` is a English model originally trained by aokikenichi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_aokikenichi_en_5.5.0_3.0_1725632183291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_aokikenichi_en_5.5.0_3.0_1725632183291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_aokikenichi","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_aokikenichi","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_aokikenichi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/aokikenichi/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_pipeline_en.md new file mode 100644 index 00000000000000..6f9f9b2cee6d86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_aokikenichi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_aokikenichi_pipeline pipeline CamemBertEmbeddings from aokikenichi +author: John Snow Labs +name: dummy_model_aokikenichi_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_aokikenichi_pipeline` is a English model originally trained by aokikenichi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_aokikenichi_pipeline_en_5.5.0_3.0_1725632261300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_aokikenichi_pipeline_en_5.5.0_3.0_1725632261300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_aokikenichi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_aokikenichi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_aokikenichi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/aokikenichi/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_en.md new file mode 100644 index 00000000000000..a55bc04f963294 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_bingwork CamemBertEmbeddings from bingwork +author: John Snow Labs +name: dummy_model_bingwork +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_bingwork` is a English model originally trained by bingwork. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_bingwork_en_5.5.0_3.0_1725636867903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_bingwork_en_5.5.0_3.0_1725636867903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_bingwork","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_bingwork","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_bingwork| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/bingwork/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_pipeline_en.md new file mode 100644 index 00000000000000..5ad72352acde27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_bingwork_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_bingwork_pipeline pipeline CamemBertEmbeddings from bingwork +author: John Snow Labs +name: dummy_model_bingwork_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_bingwork_pipeline` is a English model originally trained by bingwork. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_bingwork_pipeline_en_5.5.0_3.0_1725636946123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_bingwork_pipeline_en_5.5.0_3.0_1725636946123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_bingwork_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_bingwork_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_bingwork_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/bingwork/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_chuckym_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_chuckym_en.md new file mode 100644 index 00000000000000..b066c9a5130a84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_chuckym_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_chuckym CamemBertEmbeddings from chuckym +author: John Snow Labs +name: dummy_model_chuckym +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_chuckym` is a English model originally trained by chuckym. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_chuckym_en_5.5.0_3.0_1725637528996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_chuckym_en_5.5.0_3.0_1725637528996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_chuckym","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_chuckym","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_chuckym| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/chuckym/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_ericchchiu_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_ericchchiu_en.md new file mode 100644 index 00000000000000..7bedfd2adb72e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_ericchchiu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_ericchchiu CamemBertEmbeddings from ericchchiu +author: John Snow Labs +name: dummy_model_ericchchiu +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_ericchchiu` is a English model originally trained by ericchchiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_ericchchiu_en_5.5.0_3.0_1725632826539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_ericchchiu_en_5.5.0_3.0_1725632826539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_ericchchiu","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_ericchchiu","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_ericchchiu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ericchchiu/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_exilesaber_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_exilesaber_en.md new file mode 100644 index 00000000000000..4d23911986d511 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_exilesaber_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_exilesaber CamemBertEmbeddings from ExileSaber +author: John Snow Labs +name: dummy_model_exilesaber +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_exilesaber` is a English model originally trained by ExileSaber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_exilesaber_en_5.5.0_3.0_1725632435724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_exilesaber_en_5.5.0_3.0_1725632435724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_exilesaber","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_exilesaber","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_exilesaber| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ExileSaber/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_fab7_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_fab7_en.md new file mode 100644 index 00000000000000..a71e78fb347fb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_fab7_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_fab7 CamemBertEmbeddings from fab7 +author: John Snow Labs +name: dummy_model_fab7 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_fab7` is a English model originally trained by fab7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_fab7_en_5.5.0_3.0_1725632420925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_fab7_en_5.5.0_3.0_1725632420925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_fab7","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_fab7","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_fab7| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/fab7/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_en.md new file mode 100644 index 00000000000000..cd7d86fb2b6327 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_gautamguru CamemBertEmbeddings from gautamguru +author: John Snow Labs +name: dummy_model_gautamguru +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_gautamguru` is a English model originally trained by gautamguru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_gautamguru_en_5.5.0_3.0_1725637359841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_gautamguru_en_5.5.0_3.0_1725637359841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_gautamguru","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_gautamguru","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_gautamguru| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/gautamguru/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_pipeline_en.md new file mode 100644 index 00000000000000..625411653c8935 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_gautamguru_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_gautamguru_pipeline pipeline CamemBertEmbeddings from gautamguru +author: John Snow Labs +name: dummy_model_gautamguru_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_gautamguru_pipeline` is a English model originally trained by gautamguru. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_gautamguru_pipeline_en_5.5.0_3.0_1725637439102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_gautamguru_pipeline_en_5.5.0_3.0_1725637439102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_gautamguru_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_gautamguru_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_gautamguru_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/gautamguru/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_en.md new file mode 100644 index 00000000000000..135756afdaead8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_jaweed123 CamemBertEmbeddings from jaweed123 +author: John Snow Labs +name: dummy_model_jaweed123 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jaweed123` is a English model originally trained by jaweed123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jaweed123_en_5.5.0_3.0_1725632648096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jaweed123_en_5.5.0_3.0_1725632648096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_jaweed123","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_jaweed123","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jaweed123| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/jaweed123/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_pipeline_en.md new file mode 100644 index 00000000000000..0d685165ff89eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jaweed123_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_jaweed123_pipeline pipeline CamemBertEmbeddings from jaweed123 +author: John Snow Labs +name: dummy_model_jaweed123_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jaweed123_pipeline` is a English model originally trained by jaweed123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jaweed123_pipeline_en_5.5.0_3.0_1725632726259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jaweed123_pipeline_en_5.5.0_3.0_1725632726259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_jaweed123_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_jaweed123_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jaweed123_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/jaweed123/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jp1773hsu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jp1773hsu_pipeline_en.md new file mode 100644 index 00000000000000..a4b76ae3404af1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_jp1773hsu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_jp1773hsu_pipeline pipeline CamemBertEmbeddings from jp1773hsu +author: John Snow Labs +name: dummy_model_jp1773hsu_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jp1773hsu_pipeline` is a English model originally trained by jp1773hsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jp1773hsu_pipeline_en_5.5.0_3.0_1725631940947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jp1773hsu_pipeline_en_5.5.0_3.0_1725631940947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_jp1773hsu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_jp1773hsu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jp1773hsu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/jp1773hsu/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_kwasi_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_kwasi_en.md new file mode 100644 index 00000000000000..06fec5dd5ccfc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_kwasi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_kwasi CamemBertEmbeddings from Kwasi +author: John Snow Labs +name: dummy_model_kwasi +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_kwasi` is a English model originally trained by Kwasi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_kwasi_en_5.5.0_3.0_1725632842934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_kwasi_en_5.5.0_3.0_1725632842934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_kwasi","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_kwasi","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_kwasi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Kwasi/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_en.md new file mode 100644 index 00000000000000..25657f33202533 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_leisa CamemBertEmbeddings from Leisa +author: John Snow Labs +name: dummy_model_leisa +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_leisa` is a English model originally trained by Leisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_leisa_en_5.5.0_3.0_1725632197316.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_leisa_en_5.5.0_3.0_1725632197316.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_leisa","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_leisa","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_leisa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Leisa/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_pipeline_en.md new file mode 100644 index 00000000000000..8b117cba416f2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_leisa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_leisa_pipeline pipeline CamemBertEmbeddings from Leisa +author: John Snow Labs +name: dummy_model_leisa_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_leisa_pipeline` is a English model originally trained by Leisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_leisa_pipeline_en_5.5.0_3.0_1725632275748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_leisa_pipeline_en_5.5.0_3.0_1725632275748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_leisa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_leisa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_leisa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Leisa/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_en.md new file mode 100644 index 00000000000000..7718de926cf62c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_longbabin CamemBertEmbeddings from LongBabin +author: John Snow Labs +name: dummy_model_longbabin +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_longbabin` is a English model originally trained by LongBabin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_longbabin_en_5.5.0_3.0_1725632574234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_longbabin_en_5.5.0_3.0_1725632574234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_longbabin","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_longbabin","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_longbabin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/LongBabin/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_pipeline_en.md new file mode 100644 index 00000000000000..e73d064e116c10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_longbabin_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_longbabin_pipeline pipeline CamemBertEmbeddings from LongBabin +author: John Snow Labs +name: dummy_model_longbabin_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_longbabin_pipeline` is a English model originally trained by LongBabin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_longbabin_pipeline_en_5.5.0_3.0_1725632653073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_longbabin_pipeline_en_5.5.0_3.0_1725632653073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_longbabin_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_longbabin_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_longbabin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/LongBabin/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_lourvalli_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_lourvalli_en.md new file mode 100644 index 00000000000000..30383cb1eccae3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_lourvalli_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_lourvalli CamemBertEmbeddings from lourvalli +author: John Snow Labs +name: dummy_model_lourvalli +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_lourvalli` is a English model originally trained by lourvalli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_lourvalli_en_5.5.0_3.0_1725637110593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_lourvalli_en_5.5.0_3.0_1725637110593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_lourvalli","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_lourvalli","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_lourvalli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/lourvalli/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_en.md new file mode 100644 index 00000000000000..37f3b38bb88362 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_manil5267 CamemBertEmbeddings from manil5267 +author: John Snow Labs +name: dummy_model_manil5267 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_manil5267` is a English model originally trained by manil5267. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_manil5267_en_5.5.0_3.0_1725637340354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_manil5267_en_5.5.0_3.0_1725637340354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_manil5267","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_manil5267","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_manil5267| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/manil5267/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_pipeline_en.md new file mode 100644 index 00000000000000..3258147075556c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_manil5267_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_manil5267_pipeline pipeline CamemBertEmbeddings from manil5267 +author: John Snow Labs +name: dummy_model_manil5267_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_manil5267_pipeline` is a English model originally trained by manil5267. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_manil5267_pipeline_en_5.5.0_3.0_1725637419211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_manil5267_pipeline_en_5.5.0_3.0_1725637419211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_manil5267_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_manil5267_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_manil5267_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/manil5267/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_muger01_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_muger01_pipeline_en.md new file mode 100644 index 00000000000000..9fb6ad85cf2ef6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_muger01_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_muger01_pipeline pipeline CamemBertEmbeddings from muger01 +author: John Snow Labs +name: dummy_model_muger01_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_muger01_pipeline` is a English model originally trained by muger01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_muger01_pipeline_en_5.5.0_3.0_1725633140153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_muger01_pipeline_en_5.5.0_3.0_1725633140153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_muger01_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_muger01_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_muger01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/muger01/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_en.md new file mode 100644 index 00000000000000..7eced308f9f59a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_nayoung1 CamemBertEmbeddings from Nayoung1 +author: John Snow Labs +name: dummy_model_nayoung1 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_nayoung1` is a English model originally trained by Nayoung1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_nayoung1_en_5.5.0_3.0_1725637038658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_nayoung1_en_5.5.0_3.0_1725637038658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_nayoung1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_nayoung1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_nayoung1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Nayoung1/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_pipeline_en.md new file mode 100644 index 00000000000000..23786acfb3eba5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nayoung1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_nayoung1_pipeline pipeline CamemBertEmbeddings from Nayoung1 +author: John Snow Labs +name: dummy_model_nayoung1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_nayoung1_pipeline` is a English model originally trained by Nayoung1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_nayoung1_pipeline_en_5.5.0_3.0_1725637116717.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_nayoung1_pipeline_en_5.5.0_3.0_1725637116717.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_nayoung1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_nayoung1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_nayoung1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Nayoung1/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nugget00_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nugget00_en.md new file mode 100644 index 00000000000000..b488593b5f108f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_nugget00_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_nugget00 CamemBertEmbeddings from nugget00 +author: John Snow Labs +name: dummy_model_nugget00 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_nugget00` is a English model originally trained by nugget00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_nugget00_en_5.5.0_3.0_1725631858936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_nugget00_en_5.5.0_3.0_1725631858936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_nugget00","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_nugget00","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_nugget00| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/nugget00/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_en.md new file mode 100644 index 00000000000000..c79f2640cb2e29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_renly CamemBertEmbeddings from renly +author: John Snow Labs +name: dummy_model_renly +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_renly` is a English model originally trained by renly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_renly_en_5.5.0_3.0_1725631919662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_renly_en_5.5.0_3.0_1725631919662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_renly","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_renly","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_renly| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/renly/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_pipeline_en.md new file mode 100644 index 00000000000000..011481e2c1f5f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_renly_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_renly_pipeline pipeline CamemBertEmbeddings from renly +author: John Snow Labs +name: dummy_model_renly_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_renly_pipeline` is a English model originally trained by renly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_renly_pipeline_en_5.5.0_3.0_1725631997481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_renly_pipeline_en_5.5.0_3.0_1725631997481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_renly_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_renly_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_renly_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/renly/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_rizwanakt_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_rizwanakt_pipeline_en.md new file mode 100644 index 00000000000000..7b35bc1b3385bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_rizwanakt_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_rizwanakt_pipeline pipeline CamemBertEmbeddings from Rizwanakt +author: John Snow Labs +name: dummy_model_rizwanakt_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_rizwanakt_pipeline` is a English model originally trained by Rizwanakt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_rizwanakt_pipeline_en_5.5.0_3.0_1725632816131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_rizwanakt_pipeline_en_5.5.0_3.0_1725632816131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_rizwanakt_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_rizwanakt_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_rizwanakt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Rizwanakt/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_sapphirejade_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_sapphirejade_pipeline_en.md new file mode 100644 index 00000000000000..15545ca4425f3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_sapphirejade_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_sapphirejade_pipeline pipeline CamemBertEmbeddings from sapphirejade +author: John Snow Labs +name: dummy_model_sapphirejade_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_sapphirejade_pipeline` is a English model originally trained by sapphirejade. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_sapphirejade_pipeline_en_5.5.0_3.0_1725632333004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_sapphirejade_pipeline_en_5.5.0_3.0_1725632333004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_sapphirejade_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_sapphirejade_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_sapphirejade_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sapphirejade/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_srushnaik_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_srushnaik_pipeline_en.md new file mode 100644 index 00000000000000..e00ccb2c8f6a96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_srushnaik_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_srushnaik_pipeline pipeline CamemBertEmbeddings from SrushNaik +author: John Snow Labs +name: dummy_model_srushnaik_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_srushnaik_pipeline` is a English model originally trained by SrushNaik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_srushnaik_pipeline_en_5.5.0_3.0_1725631966419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_srushnaik_pipeline_en_5.5.0_3.0_1725631966419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_srushnaik_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_srushnaik_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_srushnaik_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/SrushNaik/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_en.md new file mode 100644 index 00000000000000..34558ea4f1f091 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_suzuki0829 CamemBertEmbeddings from suzuki0829 +author: John Snow Labs +name: dummy_model_suzuki0829 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_suzuki0829` is a English model originally trained by suzuki0829. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_suzuki0829_en_5.5.0_3.0_1725632096988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_suzuki0829_en_5.5.0_3.0_1725632096988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_suzuki0829","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_suzuki0829","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_suzuki0829| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/suzuki0829/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_pipeline_en.md new file mode 100644 index 00000000000000..9cb711dfa4564f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_suzuki0829_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_suzuki0829_pipeline pipeline CamemBertEmbeddings from suzuki0829 +author: John Snow Labs +name: dummy_model_suzuki0829_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_suzuki0829_pipeline` is a English model originally trained by suzuki0829. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_suzuki0829_pipeline_en_5.5.0_3.0_1725632176218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_suzuki0829_pipeline_en_5.5.0_3.0_1725632176218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_suzuki0829_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_suzuki0829_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_suzuki0829_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/suzuki0829/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_en.md new file mode 100644 index 00000000000000..793b38cf56d46b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_tzoght CamemBertEmbeddings from tzoght +author: John Snow Labs +name: dummy_model_tzoght +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_tzoght` is a English model originally trained by tzoght. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_tzoght_en_5.5.0_3.0_1725636851498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_tzoght_en_5.5.0_3.0_1725636851498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_tzoght","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_tzoght","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_tzoght| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/tzoght/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_pipeline_en.md new file mode 100644 index 00000000000000..453407daf0a141 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_tzoght_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_tzoght_pipeline pipeline CamemBertEmbeddings from tzoght +author: John Snow Labs +name: dummy_model_tzoght_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_tzoght_pipeline` is a English model originally trained by tzoght. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_tzoght_pipeline_en_5.5.0_3.0_1725636930676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_tzoght_pipeline_en_5.5.0_3.0_1725636930676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_tzoght_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_tzoght_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_tzoght_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/tzoght/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_en.md new file mode 100644 index 00000000000000..1cb68764350559 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_wjh70301 CamemBertEmbeddings from wjh70301 +author: John Snow Labs +name: dummy_model_wjh70301 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_wjh70301` is a English model originally trained by wjh70301. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_wjh70301_en_5.5.0_3.0_1725633150154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_wjh70301_en_5.5.0_3.0_1725633150154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_wjh70301","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_wjh70301","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_wjh70301| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/wjh70301/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_pipeline_en.md new file mode 100644 index 00000000000000..e6eb04b9881671 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_wjh70301_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_wjh70301_pipeline pipeline CamemBertEmbeddings from wjh70301 +author: John Snow Labs +name: dummy_model_wjh70301_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_wjh70301_pipeline` is a English model originally trained by wjh70301. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_wjh70301_pipeline_en_5.5.0_3.0_1725633224942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_wjh70301_pipeline_en_5.5.0_3.0_1725633224942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_wjh70301_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_wjh70301_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_wjh70301_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/wjh70301/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_en.md new file mode 100644 index 00000000000000..d60ca9447cbcd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_youarebelongtome CamemBertEmbeddings from youarebelongtome +author: John Snow Labs +name: dummy_model_youarebelongtome +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_youarebelongtome` is a English model originally trained by youarebelongtome. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_youarebelongtome_en_5.5.0_3.0_1725632610547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_youarebelongtome_en_5.5.0_3.0_1725632610547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_youarebelongtome","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_youarebelongtome","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_youarebelongtome| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/youarebelongtome/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_pipeline_en.md new file mode 100644 index 00000000000000..9653ff3c5245d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-dummy_model_youarebelongtome_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_youarebelongtome_pipeline pipeline CamemBertEmbeddings from youarebelongtome +author: John Snow Labs +name: dummy_model_youarebelongtome_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_youarebelongtome_pipeline` is a English model originally trained by youarebelongtome. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_youarebelongtome_pipeline_en_5.5.0_3.0_1725632688831.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_youarebelongtome_pipeline_en_5.5.0_3.0_1725632688831.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_youarebelongtome_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_youarebelongtome_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_youarebelongtome_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/youarebelongtome/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-eng_lug_en.md b/docs/_posts/ahmedlone127/2024-09-06-eng_lug_en.md new file mode 100644 index 00000000000000..e2693c5dbf7764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-eng_lug_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English eng_lug MarianTransformer from hellennamulinda +author: John Snow Labs +name: eng_lug +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eng_lug` is a English model originally trained by hellennamulinda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eng_lug_en_5.5.0_3.0_1725635405607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eng_lug_en_5.5.0_3.0_1725635405607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("eng_lug","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("eng_lug","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eng_lug| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.3 MB| + +## References + +https://huggingface.co/hellennamulinda/eng-lug \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-eng_lug_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-eng_lug_pipeline_en.md new file mode 100644 index 00000000000000..6fd744a55f0f36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-eng_lug_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English eng_lug_pipeline pipeline MarianTransformer from hellennamulinda +author: John Snow Labs +name: eng_lug_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eng_lug_pipeline` is a English model originally trained by hellennamulinda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eng_lug_pipeline_en_5.5.0_3.0_1725635432937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eng_lug_pipeline_en_5.5.0_3.0_1725635432937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("eng_lug_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("eng_lug_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eng_lug_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|530.8 MB| + +## References + +https://huggingface.co/hellennamulinda/eng-lug + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_coptic_norm_group_greekified_bt_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_coptic_norm_group_greekified_bt_en.md new file mode 100644 index 00000000000000..0d746a1bf95eee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_coptic_norm_group_greekified_bt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_coptic_norm_group_greekified_bt MarianTransformer from megalaa +author: John Snow Labs +name: english_coptic_norm_group_greekified_bt +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_coptic_norm_group_greekified_bt` is a English model originally trained by megalaa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_coptic_norm_group_greekified_bt_en_5.5.0_3.0_1725635346421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_coptic_norm_group_greekified_bt_en_5.5.0_3.0_1725635346421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_coptic_norm_group_greekified_bt","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_coptic_norm_group_greekified_bt","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_coptic_norm_group_greekified_bt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.3 MB| + +## References + +https://huggingface.co/megalaa/en-cop-norm-group-greekified-bt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_german_translation_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_german_translation_en.md new file mode 100644 index 00000000000000..5f67edabc41ee8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_german_translation_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_german_translation MarianTransformer from alina1997 +author: John Snow Labs +name: english_german_translation +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_german_translation` is a English model originally trained by alina1997. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_german_translation_en_5.5.0_3.0_1725636460504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_german_translation_en_5.5.0_3.0_1725636460504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("english_german_translation","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("english_german_translation","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_german_translation| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.4 MB| + +## References + +https://huggingface.co/alina1997/en_de_translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_en.md new file mode 100644 index 00000000000000..56cc75e8fb4468 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_japanese_xlm_5 XlmRoBertaEmbeddings from Sotaro0124 +author: John Snow Labs +name: english_japanese_xlm_5 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_japanese_xlm_5` is a English model originally trained by Sotaro0124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_japanese_xlm_5_en_5.5.0_3.0_1725626110435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_japanese_xlm_5_en_5.5.0_3.0_1725626110435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("english_japanese_xlm_5","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("english_japanese_xlm_5","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_japanese_xlm_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Sotaro0124/en_ja_xlm_5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_pipeline_en.md new file mode 100644 index 00000000000000..ffa6ba8e86b18d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_japanese_xlm_5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English english_japanese_xlm_5_pipeline pipeline XlmRoBertaEmbeddings from Sotaro0124 +author: John Snow Labs +name: english_japanese_xlm_5_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_japanese_xlm_5_pipeline` is a English model originally trained by Sotaro0124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_japanese_xlm_5_pipeline_en_5.5.0_3.0_1725626164339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_japanese_xlm_5_pipeline_en_5.5.0_3.0_1725626164339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("english_japanese_xlm_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("english_japanese_xlm_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_japanese_xlm_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Sotaro0124/en_ja_xlm_5 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_en.md new file mode 100644 index 00000000000000..8805dc5998ded6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_multinerd_ner_more_training DistilBertForTokenClassification from pariakashani +author: John Snow Labs +name: english_multinerd_ner_more_training +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_multinerd_ner_more_training` is a English model originally trained by pariakashani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_more_training_en_5.5.0_3.0_1725654075374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_more_training_en_5.5.0_3.0_1725654075374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("english_multinerd_ner_more_training","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("english_multinerd_ner_more_training", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_multinerd_ner_more_training| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/pariakashani/en-multinerd-ner-more-training \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_pipeline_en.md new file mode 100644 index 00000000000000..90dc604268aa55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-english_multinerd_ner_more_training_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English english_multinerd_ner_more_training_pipeline pipeline DistilBertForTokenClassification from pariakashani +author: John Snow Labs +name: english_multinerd_ner_more_training_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_multinerd_ner_more_training_pipeline` is a English model originally trained by pariakashani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_more_training_pipeline_en_5.5.0_3.0_1725654087079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_more_training_pipeline_en_5.5.0_3.0_1725654087079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("english_multinerd_ner_more_training_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("english_multinerd_ner_more_training_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_multinerd_ner_more_training_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/pariakashani/en-multinerd-ner-more-training + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_fr.md new file mode 100644 index 00000000000000..1f5559f23f1763 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_fr.md @@ -0,0 +1,66 @@ +--- +layout: model +title: French esgi_nlp_tp4_virtual_assistant_pipeline pipeline RoBertaForTokenClassification from Florent-COMPAGNONI +author: John Snow Labs +name: esgi_nlp_tp4_virtual_assistant_pipeline +date: 2024-09-06 +tags: [fr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esgi_nlp_tp4_virtual_assistant_pipeline` is a French model originally trained by Florent-COMPAGNONI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esgi_nlp_tp4_virtual_assistant_pipeline_fr_5.5.0_3.0_1725638266297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esgi_nlp_tp4_virtual_assistant_pipeline_fr_5.5.0_3.0_1725638266297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esgi_nlp_tp4_virtual_assistant_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esgi_nlp_tp4_virtual_assistant_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esgi_nlp_tp4_virtual_assistant_pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|fr| +|Size:|317.4 MB| + +## References + +https://huggingface.co/Florent-COMPAGNONI/esgi-nlp-tp4-virtual_assistant_pipeline \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr.md new file mode 100644 index 00000000000000..f3d0b46a207e61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French esgi_nlp_tp4_virtual_assistant_pipeline_pipeline pipeline RoBertaForTokenClassification from Florent-COMPAGNONI +author: John Snow Labs +name: esgi_nlp_tp4_virtual_assistant_pipeline_pipeline +date: 2024-09-06 +tags: [fr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esgi_nlp_tp4_virtual_assistant_pipeline_pipeline` is a French model originally trained by Florent-COMPAGNONI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr_5.5.0_3.0_1725638349055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esgi_nlp_tp4_virtual_assistant_pipeline_pipeline_fr_5.5.0_3.0_1725638349055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("esgi_nlp_tp4_virtual_assistant_pipeline_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("esgi_nlp_tp4_virtual_assistant_pipeline_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esgi_nlp_tp4_virtual_assistant_pipeline_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|317.5 MB| + +## References + +https://huggingface.co/Florent-COMPAGNONI/esgi-nlp-tp4-virtual_assistant_pipeline + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-expe_4_en.md b/docs/_posts/ahmedlone127/2024-09-06-expe_4_en.md new file mode 100644 index 00000000000000..f9b4fe294d579f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-expe_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English expe_4 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: expe_4 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`expe_4` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/expe_4_en_5.5.0_3.0_1725613422385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/expe_4_en_5.5.0_3.0_1725613422385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|expe_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Expe_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-expe_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-expe_4_pipeline_en.md new file mode 100644 index 00000000000000..b3996e5a3cd200 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-expe_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English expe_4_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: expe_4_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`expe_4_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/expe_4_pipeline_en_5.5.0_3.0_1725613445875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/expe_4_pipeline_en_5.5.0_3.0_1725613445875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("expe_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("expe_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|expe_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Expe_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-extract_question_from_text_en.md b/docs/_posts/ahmedlone127/2024-09-06-extract_question_from_text_en.md new file mode 100644 index 00000000000000..f4aef01a10cbd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-extract_question_from_text_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English extract_question_from_text DistilBertForQuestionAnswering from wdavies +author: John Snow Labs +name: extract_question_from_text +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`extract_question_from_text` is a English model originally trained by wdavies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/extract_question_from_text_en_5.5.0_3.0_1725652776537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/extract_question_from_text_en_5.5.0_3.0_1725652776537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("extract_question_from_text","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("extract_question_from_text", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|extract_question_from_text| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wdavies/extract-question-from-text \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-extractive_qa_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-extractive_qa_model_en.md new file mode 100644 index 00000000000000..02616714e38e52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-extractive_qa_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English extractive_qa_model DistilBertForQuestionAnswering from waqasali1707 +author: John Snow Labs +name: extractive_qa_model +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`extractive_qa_model` is a English model originally trained by waqasali1707. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/extractive_qa_model_en_5.5.0_3.0_1725621486189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/extractive_qa_model_en_5.5.0_3.0_1725621486189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("extractive_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("extractive_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|extractive_qa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/waqasali1707/Extractive-QA-Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fabert_fa.md b/docs/_posts/ahmedlone127/2024-09-06-fabert_fa.md new file mode 100644 index 00000000000000..d054b3876e8e12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fabert_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian fabert BertEmbeddings from sbunlp +author: John Snow Labs +name: fabert +date: 2024-09-06 +tags: [fa, open_source, onnx, embeddings, bert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fabert` is a Persian model originally trained by sbunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fabert_fa_5.5.0_3.0_1725614837089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fabert_fa_5.5.0_3.0_1725614837089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("fabert","fa") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("fabert","fa") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fabert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|fa| +|Size:|464.5 MB| + +## References + +https://huggingface.co/sbunlp/fabert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fabert_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-06-fabert_pipeline_fa.md new file mode 100644 index 00000000000000..6cf324e5364eef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fabert_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian fabert_pipeline pipeline BertEmbeddings from sbunlp +author: John Snow Labs +name: fabert_pipeline +date: 2024-09-06 +tags: [fa, open_source, pipeline, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fabert_pipeline` is a Persian model originally trained by sbunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fabert_pipeline_fa_5.5.0_3.0_1725614861041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fabert_pipeline_fa_5.5.0_3.0_1725614861041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fabert_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fabert_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fabert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|464.6 MB| + +## References + +https://huggingface.co/sbunlp/fabert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-facets_128b_en.md b/docs/_posts/ahmedlone127/2024-09-06-facets_128b_en.md new file mode 100644 index 00000000000000..1a352eb32518fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-facets_128b_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English facets_128b MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_128b +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_128b` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_128b_en_5.5.0_3.0_1725595174614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_128b_en_5.5.0_3.0_1725595174614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("facets_128b","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("facets_128b","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_128b| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_128b \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-facets_128b_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-facets_128b_pipeline_en.md new file mode 100644 index 00000000000000..3381576fe495f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-facets_128b_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English facets_128b_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_128b_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_128b_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_128b_pipeline_en_5.5.0_3.0_1725595198846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_128b_pipeline_en_5.5.0_3.0_1725595198846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("facets_128b_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("facets_128b_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_128b_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_128b + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_en.md b/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_en.md new file mode 100644 index 00000000000000..42dc752ce7de0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English facets_ep3_1122 MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_ep3_1122 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_ep3_1122` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_ep3_1122_en_5.5.0_3.0_1725595439027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_ep3_1122_en_5.5.0_3.0_1725595439027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("facets_ep3_1122","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("facets_ep3_1122","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_ep3_1122| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_ep3_1122 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_pipeline_en.md new file mode 100644 index 00000000000000..26b1dc9b469ea4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-facets_ep3_1122_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English facets_ep3_1122_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_ep3_1122_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_ep3_1122_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_ep3_1122_pipeline_en_5.5.0_3.0_1725595458708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_ep3_1122_pipeline_en_5.5.0_3.0_1725595458708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("facets_ep3_1122_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("facets_ep3_1122_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_ep3_1122_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_ep3_1122 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_de.md b/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_de.md new file mode 100644 index 00000000000000..41e0c901ed9266 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German fairlex_fscs_minilm XlmRoBertaEmbeddings from coastalcph +author: John Snow Labs +name: fairlex_fscs_minilm +date: 2024-09-06 +tags: [de, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fairlex_fscs_minilm` is a German model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fairlex_fscs_minilm_de_5.5.0_3.0_1725596334112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fairlex_fscs_minilm_de_5.5.0_3.0_1725596334112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("fairlex_fscs_minilm","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("fairlex_fscs_minilm","de") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fairlex_fscs_minilm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|de| +|Size:|402.9 MB| + +## References + +https://huggingface.co/coastalcph/fairlex-fscs-minilm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_pipeline_de.md new file mode 100644 index 00000000000000..2a4dc7282af9ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fairlex_fscs_minilm_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German fairlex_fscs_minilm_pipeline pipeline XlmRoBertaEmbeddings from coastalcph +author: John Snow Labs +name: fairlex_fscs_minilm_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fairlex_fscs_minilm_pipeline` is a German model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fairlex_fscs_minilm_pipeline_de_5.5.0_3.0_1725596353562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fairlex_fscs_minilm_pipeline_de_5.5.0_3.0_1725596353562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fairlex_fscs_minilm_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fairlex_fscs_minilm_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fairlex_fscs_minilm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|403.0 MB| + +## References + +https://huggingface.co/coastalcph/fairlex-fscs-minilm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_en.md b/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_en.md new file mode 100644 index 00000000000000..4784af010a4d25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English faq_qa_model_manvitha DistilBertForQuestionAnswering from Manvitha +author: John Snow Labs +name: faq_qa_model_manvitha +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`faq_qa_model_manvitha` is a English model originally trained by Manvitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/faq_qa_model_manvitha_en_5.5.0_3.0_1725654731328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/faq_qa_model_manvitha_en_5.5.0_3.0_1725654731328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("faq_qa_model_manvitha","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("faq_qa_model_manvitha", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|faq_qa_model_manvitha| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Manvitha/faq_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_pipeline_en.md new file mode 100644 index 00000000000000..7b9d7c7ecc8adf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-faq_qa_model_manvitha_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English faq_qa_model_manvitha_pipeline pipeline DistilBertForQuestionAnswering from Manvitha +author: John Snow Labs +name: faq_qa_model_manvitha_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`faq_qa_model_manvitha_pipeline` is a English model originally trained by Manvitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/faq_qa_model_manvitha_pipeline_en_5.5.0_3.0_1725654743495.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/faq_qa_model_manvitha_pipeline_en_5.5.0_3.0_1725654743495.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("faq_qa_model_manvitha_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("faq_qa_model_manvitha_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|faq_qa_model_manvitha_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Manvitha/faq_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-few_shot_learner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-few_shot_learner_pipeline_en.md new file mode 100644 index 00000000000000..fda60f75b33f09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-few_shot_learner_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English few_shot_learner_pipeline pipeline MPNetEmbeddings from ManuelaJeyaraj +author: John Snow Labs +name: few_shot_learner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`few_shot_learner_pipeline` is a English model originally trained by ManuelaJeyaraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/few_shot_learner_pipeline_en_5.5.0_3.0_1725595482249.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/few_shot_learner_pipeline_en_5.5.0_3.0_1725595482249.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("few_shot_learner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("few_shot_learner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|few_shot_learner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ManuelaJeyaraj/few_shot_learner + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_de.md b/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_de.md new file mode 100644 index 00000000000000..48fa4e7a788027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German fiction_gbert_large_droc_np_ner BertForTokenClassification from LennartKeller +author: John Snow Labs +name: fiction_gbert_large_droc_np_ner +date: 2024-09-06 +tags: [de, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fiction_gbert_large_droc_np_ner` is a German model originally trained by LennartKeller. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fiction_gbert_large_droc_np_ner_de_5.5.0_3.0_1725601107098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fiction_gbert_large_droc_np_ner_de_5.5.0_3.0_1725601107098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("fiction_gbert_large_droc_np_ner","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("fiction_gbert_large_droc_np_ner", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fiction_gbert_large_droc_np_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|1.3 GB| + +## References + +https://huggingface.co/LennartKeller/fiction-gbert-large-droc-np-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_pipeline_de.md new file mode 100644 index 00000000000000..fe1a3341322218 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fiction_gbert_large_droc_np_ner_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German fiction_gbert_large_droc_np_ner_pipeline pipeline BertForTokenClassification from LennartKeller +author: John Snow Labs +name: fiction_gbert_large_droc_np_ner_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fiction_gbert_large_droc_np_ner_pipeline` is a German model originally trained by LennartKeller. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fiction_gbert_large_droc_np_ner_pipeline_de_5.5.0_3.0_1725601166795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fiction_gbert_large_droc_np_ner_pipeline_de_5.5.0_3.0_1725601166795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fiction_gbert_large_droc_np_ner_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fiction_gbert_large_droc_np_ner_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fiction_gbert_large_droc_np_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|1.3 GB| + +## References + +https://huggingface.co/LennartKeller/fiction-gbert-large-droc-np-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-film_en.md b/docs/_posts/ahmedlone127/2024-09-06-film_en.md new file mode 100644 index 00000000000000..660bd8e11dea26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-film_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English film RoBertaEmbeddings from HYdsl +author: John Snow Labs +name: film +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film` is a English model originally trained by HYdsl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film_en_5.5.0_3.0_1725660571067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film_en_5.5.0_3.0_1725660571067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("film","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("film","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/HYdsl/FiLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-film_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-film_pipeline_en.md new file mode 100644 index 00000000000000..b5ef6f3174c0c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-film_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English film_pipeline pipeline RoBertaEmbeddings from HYdsl +author: John Snow Labs +name: film_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film_pipeline` is a English model originally trained by HYdsl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film_pipeline_en_5.5.0_3.0_1725660593066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film_pipeline_en_5.5.0_3.0_1725660593066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("film_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("film_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/HYdsl/FiLM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finance_bearish_bullish_en.md b/docs/_posts/ahmedlone127/2024-09-06-finance_bearish_bullish_en.md new file mode 100644 index 00000000000000..8cda1ab8af7b01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finance_bearish_bullish_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finance_bearish_bullish DistilBertForSequenceClassification from ldh243 +author: John Snow Labs +name: finance_bearish_bullish +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finance_bearish_bullish` is a English model originally trained by ldh243. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finance_bearish_bullish_en_5.5.0_3.0_1725607947689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finance_bearish_bullish_en_5.5.0_3.0_1725607947689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("finance_bearish_bullish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("finance_bearish_bullish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finance_bearish_bullish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ldh243/finance-bearish-bullish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fine_tune_bert_en.md b/docs/_posts/ahmedlone127/2024-09-06-fine_tune_bert_en.md new file mode 100644 index 00000000000000..3cf7be8dfa2b9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fine_tune_bert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tune_bert DistilBertForQuestionAnswering from SaiSaketh +author: John Snow Labs +name: fine_tune_bert +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tune_bert` is a English model originally trained by SaiSaketh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tune_bert_en_5.5.0_3.0_1725652446491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tune_bert_en_5.5.0_3.0_1725652446491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tune_bert","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tune_bert", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tune_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SaiSaketh/fine_tune_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..ca767b20591b70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_base_uncased_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English fine_tuned_distilbert_base_uncased DistilBertForSequenceClassification from bright1 +author: John Snow Labs +name: fine_tuned_distilbert_base_uncased +date: 2024-09-06 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_distilbert_base_uncased` is a English model originally trained by bright1. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_base_uncased_en_5.5.0_3.0_1725639860466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_base_uncased_en_5.5.0_3.0_1725639860466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("fine_tuned_distilbert_base_uncased","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("fine_tuned_distilbert_base_uncased","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_distilbert_base_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +References + +https://huggingface.co/bright1/fine-tuned-distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_en.md b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_en.md new file mode 100644 index 00000000000000..d1b5d162e4c281 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_distilbert_medical_chatbot DistilBertForQuestionAnswering from adamfendri +author: John Snow Labs +name: fine_tuned_distilbert_medical_chatbot +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_distilbert_medical_chatbot` is a English model originally trained by adamfendri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_medical_chatbot_en_5.5.0_3.0_1725652233800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_medical_chatbot_en_5.5.0_3.0_1725652233800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_distilbert_medical_chatbot","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_distilbert_medical_chatbot", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_distilbert_medical_chatbot| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/adamfendri/fine-tuned-distilbert-medical-chatbot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_pipeline_en.md new file mode 100644 index 00000000000000..c5b3b788f6a932 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-fine_tuned_distilbert_medical_chatbot_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tuned_distilbert_medical_chatbot_pipeline pipeline DistilBertForQuestionAnswering from adamfendri +author: John Snow Labs +name: fine_tuned_distilbert_medical_chatbot_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_distilbert_medical_chatbot_pipeline` is a English model originally trained by adamfendri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_medical_chatbot_pipeline_en_5.5.0_3.0_1725652245733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_medical_chatbot_pipeline_en_5.5.0_3.0_1725652245733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tuned_distilbert_medical_chatbot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tuned_distilbert_medical_chatbot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_distilbert_medical_chatbot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/adamfendri/fine-tuned-distilbert-medical-chatbot + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_english_marathi_marh_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_english_marathi_marh_en.md new file mode 100644 index 00000000000000..8492e37a80e73f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_english_marathi_marh_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_helsinki_nlp_english_marathi_marh MarianTransformer from anujsahani01 +author: John Snow Labs +name: finetuned_helsinki_nlp_english_marathi_marh +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_helsinki_nlp_english_marathi_marh` is a English model originally trained by anujsahani01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_english_marathi_marh_en_5.5.0_3.0_1725636117525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_english_marathi_marh_en_5.5.0_3.0_1725636117525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_helsinki_nlp_english_marathi_marh","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_helsinki_nlp_english_marathi_marh","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_helsinki_nlp_english_marathi_marh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|520.1 MB| + +## References + +https://huggingface.co/anujsahani01/finetuned_Helsinki-NLP-en-mr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_marathi_marh_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_marathi_marh_english_pipeline_en.md new file mode 100644 index 00000000000000..e6700f1d92c639 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_helsinki_nlp_marathi_marh_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_helsinki_nlp_marathi_marh_english_pipeline pipeline MarianTransformer from anujsahani01 +author: John Snow Labs +name: finetuned_helsinki_nlp_marathi_marh_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_helsinki_nlp_marathi_marh_english_pipeline` is a English model originally trained by anujsahani01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_marathi_marh_english_pipeline_en_5.5.0_3.0_1725636164802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_helsinki_nlp_marathi_marh_english_pipeline_en_5.5.0_3.0_1725636164802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_helsinki_nlp_marathi_marh_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_helsinki_nlp_marathi_marh_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_helsinki_nlp_marathi_marh_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|525.3 MB| + +## References + +https://huggingface.co/anujsahani01/finetuned_Helsinki-NLP-mr-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_en.md new file mode 100644 index 00000000000000..4df33b252115a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_hindi_tonga_tonga_islands_english MarianTransformer from Adwait1199 +author: John Snow Labs +name: finetuned_hindi_tonga_tonga_islands_english +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_hindi_tonga_tonga_islands_english` is a English model originally trained by Adwait1199. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_hindi_tonga_tonga_islands_english_en_5.5.0_3.0_1725635315726.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_hindi_tonga_tonga_islands_english_en_5.5.0_3.0_1725635315726.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_hindi_tonga_tonga_islands_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_hindi_tonga_tonga_islands_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_hindi_tonga_tonga_islands_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|518.5 MB| + +## References + +https://huggingface.co/Adwait1199/finetuned-hi-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..97fdf9f4301426 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_hindi_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_hindi_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from Adwait1199 +author: John Snow Labs +name: finetuned_hindi_tonga_tonga_islands_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_hindi_tonga_tonga_islands_english_pipeline` is a English model originally trained by Adwait1199. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_hindi_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725635341777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_hindi_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725635341777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_hindi_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_hindi_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_hindi_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|519.0 MB| + +## References + +https://huggingface.co/Adwait1199/finetuned-hi-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en.md new file mode 100644 index 00000000000000..75c8a4d5781c6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2 MarianTransformer from tarsssss +author: John Snow Labs +name: finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2` is a English model originally trained by tarsssss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en_5.5.0_3.0_1725635117448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_en_5.5.0_3.0_1725635117448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|285.5 MB| + +## References + +https://huggingface.co/tarsssss/finetuned-kde4-pt-to-ca-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en.md new file mode 100644 index 00000000000000..d47cbd0682337b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline pipeline MarianTransformer from tarsssss +author: John Snow Labs +name: finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline` is a English model originally trained by tarsssss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en_5.5.0_3.0_1725635131788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline_en_5.5.0_3.0_1725635131788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_kde4_portuguese_tonga_tonga_islands_catalan_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|286.0 MB| + +## References + +https://huggingface.co/tarsssss/finetuned-kde4-pt-to-ca-2 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_opusmt_english_hindi_gujarati_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_opusmt_english_hindi_gujarati_pipeline_en.md new file mode 100644 index 00000000000000..8f6df136cef342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_opusmt_english_hindi_gujarati_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_opusmt_english_hindi_gujarati_pipeline pipeline MarianTransformer from Varsha00 +author: John Snow Labs +name: finetuned_opusmt_english_hindi_gujarati_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_opusmt_english_hindi_gujarati_pipeline` is a English model originally trained by Varsha00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_hindi_gujarati_pipeline_en_5.5.0_3.0_1725636357019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_opusmt_english_hindi_gujarati_pipeline_en_5.5.0_3.0_1725636357019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_opusmt_english_hindi_gujarati_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_opusmt_english_hindi_gujarati_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_opusmt_english_hindi_gujarati_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|530.7 MB| + +## References + +https://huggingface.co/Varsha00/finetuned-opusmt-en-hi-gu + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_qa_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_qa_model_en.md new file mode 100644 index 00000000000000..745df5b4b2d6d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_qa_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finetuned_qa_model DistilBertForQuestionAnswering from yileitu +author: John Snow Labs +name: finetuned_qa_model +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_qa_model` is a English model originally trained by yileitu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_qa_model_en_5.5.0_3.0_1725621486665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_qa_model_en_5.5.0_3.0_1725621486665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("finetuned_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("finetuned_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_qa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/yileitu/finetuned_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuned_whisper_small_tanglish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuned_whisper_small_tanglish_pipeline_en.md new file mode 100644 index 00000000000000..efd50434fc9e6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuned_whisper_small_tanglish_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetuned_whisper_small_tanglish_pipeline pipeline WhisperForCTC from sachinabhinav +author: John Snow Labs +name: finetuned_whisper_small_tanglish_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_whisper_small_tanglish_pipeline` is a English model originally trained by sachinabhinav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_whisper_small_tanglish_pipeline_en_5.5.0_3.0_1725605905983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_whisper_small_tanglish_pipeline_en_5.5.0_3.0_1725605905983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_whisper_small_tanglish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_whisper_small_tanglish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_whisper_small_tanglish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/sachinabhinav/finetuned_whisper_small_tanglish + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_en.md new file mode 100644 index 00000000000000..2496aaa7b042e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_albert AlbertForSequenceClassification from ShashwatDash +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_albert +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_albert` is a English model originally trained by ShashwatDash. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_albert_en_5.5.0_3.0_1725628393355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_albert_en_5.5.0_3.0_1725628393355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("finetuning_sentiment_model_3000_samples_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_albert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/ShashwatDash/finetuning-sentiment-model-3000-samples-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_pipeline_en.md new file mode 100644 index 00000000000000..f190744ae59fc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_3000_samples_albert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuning_sentiment_model_3000_samples_albert_pipeline pipeline AlbertForSequenceClassification from ShashwatDash +author: John Snow Labs +name: finetuning_sentiment_model_3000_samples_albert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_3000_samples_albert_pipeline` is a English model originally trained by ShashwatDash. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_albert_pipeline_en_5.5.0_3.0_1725628395821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_3000_samples_albert_pipeline_en_5.5.0_3.0_1725628395821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuning_sentiment_model_3000_samples_albert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuning_sentiment_model_3000_samples_albert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_3000_samples_albert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/ShashwatDash/finetuning-sentiment-model-3000-samples-albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_deberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_deberta_en.md new file mode 100644 index 00000000000000..dc47b3c8748820 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finetuning_sentiment_model_deberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuning_sentiment_model_deberta DeBertaForSequenceClassification from devtanumisra +author: John Snow Labs +name: finetuning_sentiment_model_deberta +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_sentiment_model_deberta` is a English model originally trained by devtanumisra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_deberta_en_5.5.0_3.0_1725611669796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_sentiment_model_deberta_en_5.5.0_3.0_1725611669796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("finetuning_sentiment_model_deberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("finetuning_sentiment_model_deberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_sentiment_model_deberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|634.2 MB| + +## References + +https://huggingface.co/devtanumisra/finetuning-sentiment-model-deberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_en.md b/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_en.md new file mode 100644 index 00000000000000..a9cfdc9bcca2ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finguard_distilbert_37500 DistilBertForTokenClassification from AnirudhLanka2002 +author: John Snow Labs +name: finguard_distilbert_37500 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finguard_distilbert_37500` is a English model originally trained by AnirudhLanka2002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finguard_distilbert_37500_en_5.5.0_3.0_1725653744790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finguard_distilbert_37500_en_5.5.0_3.0_1725653744790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("finguard_distilbert_37500","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("finguard_distilbert_37500", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finguard_distilbert_37500| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|244.6 MB| + +## References + +https://huggingface.co/AnirudhLanka2002/finguard_distilBERT_37500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_pipeline_en.md new file mode 100644 index 00000000000000..d131ee6ba2122d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finguard_distilbert_37500_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finguard_distilbert_37500_pipeline pipeline DistilBertForTokenClassification from AnirudhLanka2002 +author: John Snow Labs +name: finguard_distilbert_37500_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finguard_distilbert_37500_pipeline` is a English model originally trained by AnirudhLanka2002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finguard_distilbert_37500_pipeline_en_5.5.0_3.0_1725653756300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finguard_distilbert_37500_pipeline_en_5.5.0_3.0_1725653756300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finguard_distilbert_37500_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finguard_distilbert_37500_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finguard_distilbert_37500_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|244.6 MB| + +## References + +https://huggingface.co/AnirudhLanka2002/finguard_distilBERT_37500 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_fi.md b/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_fi.md new file mode 100644 index 00000000000000..fffa129c7f6341 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_fi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Finnish finnish_rauma MarianTransformer from codymd +author: John Snow Labs +name: finnish_rauma +date: 2024-09-06 +tags: [fi, open_source, onnx, translation, marian] +task: Translation +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finnish_rauma` is a Finnish model originally trained by codymd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finnish_rauma_fi_5.5.0_3.0_1725635650051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finnish_rauma_fi_5.5.0_3.0_1725635650051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finnish_rauma","fi") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finnish_rauma","fi") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finnish_rauma| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|fi| +|Size:|355.8 MB| + +## References + +https://huggingface.co/codymd/fi-rauma \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_pipeline_fi.md new file mode 100644 index 00000000000000..677201367d11f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-finnish_rauma_pipeline_fi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Finnish finnish_rauma_pipeline pipeline MarianTransformer from codymd +author: John Snow Labs +name: finnish_rauma_pipeline +date: 2024-09-06 +tags: [fi, open_source, pipeline, onnx] +task: Translation +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finnish_rauma_pipeline` is a Finnish model originally trained by codymd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finnish_rauma_pipeline_fi_5.5.0_3.0_1725635667811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finnish_rauma_pipeline_fi_5.5.0_3.0_1725635667811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finnish_rauma_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finnish_rauma_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finnish_rauma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|356.3 MB| + +## References + +https://huggingface.co/codymd/fi-rauma + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-first_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-first_model_en.md new file mode 100644 index 00000000000000..50f7f312b6d88a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-first_model_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English first_model BertEmbeddings from songqian +author: John Snow Labs +name: first_model +date: 2024-09-06 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_model` is a English model originally trained by songqian. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_model_en_5.5.0_3.0_1725637498095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_model_en_5.5.0_3.0_1725637498095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("first_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("first_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +References + +https://huggingface.co/songqian/first_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-first_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-first_model_pipeline_en.md new file mode 100644 index 00000000000000..ec0191e1f3b491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-first_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English first_model_pipeline pipeline CamemBertEmbeddings from pankaj4u4m +author: John Snow Labs +name: first_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_model_pipeline` is a English model originally trained by pankaj4u4m. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_model_pipeline_en_5.5.0_3.0_1725637576622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_model_pipeline_en_5.5.0_3.0_1725637576622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("first_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("first_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/pankaj4u4m/first_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_en.md b/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_en.md new file mode 100644 index 00000000000000..00288a6f5a17bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gal_ner_xlmr_5 XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: gal_ner_xlmr_5 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_ner_xlmr_5` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_ner_xlmr_5_en_5.5.0_3.0_1725592757181.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_ner_xlmr_5_en_5.5.0_3.0_1725592757181.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_ner_xlmr_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_ner_xlmr_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_ner_xlmr_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|808.6 MB| + +## References + +https://huggingface.co/homersimpson/gal-ner-xlmr-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_pipeline_en.md new file mode 100644 index 00000000000000..618fdeb8b3a671 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-gal_ner_xlmr_5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gal_ner_xlmr_5_pipeline pipeline XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: gal_ner_xlmr_5_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_ner_xlmr_5_pipeline` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_ner_xlmr_5_pipeline_en_5.5.0_3.0_1725592877480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_ner_xlmr_5_pipeline_en_5.5.0_3.0_1725592877480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gal_ner_xlmr_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gal_ner_xlmr_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_ner_xlmr_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|808.6 MB| + +## References + +https://huggingface.co/homersimpson/gal-ner-xlmr-5 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_en.md new file mode 100644 index 00000000000000..2aa8f877917c60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gdpr_privacy_policy_ner BertForTokenClassification from PaDaS-Lab +author: John Snow Labs +name: gdpr_privacy_policy_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gdpr_privacy_policy_ner` is a English model originally trained by PaDaS-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gdpr_privacy_policy_ner_en_5.5.0_3.0_1725634237549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gdpr_privacy_policy_ner_en_5.5.0_3.0_1725634237549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("gdpr_privacy_policy_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("gdpr_privacy_policy_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gdpr_privacy_policy_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.8 MB| + +## References + +https://huggingface.co/PaDaS-Lab/gdpr-privacy-policy-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_pipeline_en.md new file mode 100644 index 00000000000000..f1d84140ca02ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-gdpr_privacy_policy_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gdpr_privacy_policy_ner_pipeline pipeline BertForTokenClassification from PaDaS-Lab +author: John Snow Labs +name: gdpr_privacy_policy_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gdpr_privacy_policy_ner_pipeline` is a English model originally trained by PaDaS-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gdpr_privacy_policy_ner_pipeline_en_5.5.0_3.0_1725634257940.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gdpr_privacy_policy_ner_pipeline_en_5.5.0_3.0_1725634257940.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gdpr_privacy_policy_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gdpr_privacy_policy_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gdpr_privacy_policy_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.9 MB| + +## References + +https://huggingface.co/PaDaS-Lab/gdpr-privacy-policy-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_mr.md b/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_mr.md new file mode 100644 index 00000000000000..f3a30c99dc02ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_mr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Marathi hate_bert_hasoc_marathi AlbertForSequenceClassification from l3cube-pune +author: John Snow Labs +name: hate_bert_hasoc_marathi +date: 2024-09-06 +tags: [mr, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_bert_hasoc_marathi` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_bert_hasoc_marathi_mr_5.5.0_3.0_1725628590813.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_bert_hasoc_marathi_mr_5.5.0_3.0_1725628590813.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("hate_bert_hasoc_marathi","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("hate_bert_hasoc_marathi", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_bert_hasoc_marathi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|mr| +|Size:|127.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hate-bert-hasoc-marathi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_pipeline_mr.md new file mode 100644 index 00000000000000..f61da4e4c9f78c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hate_bert_hasoc_marathi_pipeline_mr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Marathi hate_bert_hasoc_marathi_pipeline pipeline AlbertForSequenceClassification from l3cube-pune +author: John Snow Labs +name: hate_bert_hasoc_marathi_pipeline +date: 2024-09-06 +tags: [mr, open_source, pipeline, onnx] +task: Text Classification +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_bert_hasoc_marathi_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_bert_hasoc_marathi_pipeline_mr_5.5.0_3.0_1725628597245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_bert_hasoc_marathi_pipeline_mr_5.5.0_3.0_1725628597245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hate_bert_hasoc_marathi_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hate_bert_hasoc_marathi_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_bert_hasoc_marathi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|127.8 MB| + +## References + +https://huggingface.co/l3cube-pune/hate-bert-hasoc-marathi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_en.md new file mode 100644 index 00000000000000..8b8c1a6fddf282 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hate_hate_random3_seed0_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random3_seed0_bernice +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random3_seed0_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random3_seed0_bernice_en_5.5.0_3.0_1725619662681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random3_seed0_bernice_en_5.5.0_3.0_1725619662681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random3_seed0_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random3_seed0_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random3_seed0_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|783.4 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random3_seed0-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_pipeline_en.md new file mode 100644 index 00000000000000..27c5eed692d28e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hate_hate_random3_seed0_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hate_hate_random3_seed0_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random3_seed0_bernice_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random3_seed0_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725619815798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725619815798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hate_hate_random3_seed0_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hate_hate_random3_seed0_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random3_seed0_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.4 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random3_seed0-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_en.md b/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_en.md new file mode 100644 index 00000000000000..26d0939e7d7857 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm_cosine DistilBertEmbeddings from nos1de +author: John Snow Labs +name: hf_distilbert_imdb_mlm_cosine +date: 2024-09-06 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm_cosine` is a English model originally trained by nos1de. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.5.0_3.0_1725664909455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.5.0_3.0_1725664909455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hf_distilbert_imdb_mlm_cosine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hf_distilbert_imdb_mlm_cosine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm_cosine| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/nos1de/hf-distilbert-imdb-mlm-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_pipeline_en.md new file mode 100644 index 00000000000000..47e18f98ee47b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hf_distilbert_imdb_mlm_cosine_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm_cosine_pipeline pipeline DistilBertEmbeddings from ecwk +author: John Snow Labs +name: hf_distilbert_imdb_mlm_cosine_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm_cosine_pipeline` is a English model originally trained by ecwk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_pipeline_en_5.5.0_3.0_1725664921689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_pipeline_en_5.5.0_3.0_1725664921689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hf_distilbert_imdb_mlm_cosine_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hf_distilbert_imdb_mlm_cosine_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm_cosine_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ecwk/hf-distilbert-imdb-mlm-cosine + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_en.md b/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_en.md new file mode 100644 index 00000000000000..bb642d7e5f510e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English huner_ncbi_disease_dslim DistilBertForTokenClassification from manibt1993 +author: John Snow Labs +name: huner_ncbi_disease_dslim +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`huner_ncbi_disease_dslim` is a English model originally trained by manibt1993. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/huner_ncbi_disease_dslim_en_5.5.0_3.0_1725599377603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/huner_ncbi_disease_dslim_en_5.5.0_3.0_1725599377603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("huner_ncbi_disease_dslim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("huner_ncbi_disease_dslim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|huner_ncbi_disease_dslim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/manibt1993/huner_ncbi_disease_dslim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_pipeline_en.md new file mode 100644 index 00000000000000..46840c07d8871e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-huner_ncbi_disease_dslim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English huner_ncbi_disease_dslim_pipeline pipeline DistilBertForTokenClassification from manibt1993 +author: John Snow Labs +name: huner_ncbi_disease_dslim_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`huner_ncbi_disease_dslim_pipeline` is a English model originally trained by manibt1993. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/huner_ncbi_disease_dslim_pipeline_en_5.5.0_3.0_1725599389217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/huner_ncbi_disease_dslim_pipeline_en_5.5.0_3.0_1725599389217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("huner_ncbi_disease_dslim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("huner_ncbi_disease_dslim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|huner_ncbi_disease_dslim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/manibt1993/huner_ncbi_disease_dslim + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_en.md new file mode 100644 index 00000000000000..669f7afccb7c38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English hw10_qa_model DistilBertForQuestionAnswering from annajohn +author: John Snow Labs +name: hw10_qa_model +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw10_qa_model` is a English model originally trained by annajohn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw10_qa_model_en_5.5.0_3.0_1725654683861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw10_qa_model_en_5.5.0_3.0_1725654683861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("hw10_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("hw10_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw10_qa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/annajohn/hw10_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_pipeline_en.md new file mode 100644 index 00000000000000..740a56bd7f4407 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-hw10_qa_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English hw10_qa_model_pipeline pipeline DistilBertForQuestionAnswering from annajohn +author: John Snow Labs +name: hw10_qa_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw10_qa_model_pipeline` is a English model originally trained by annajohn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw10_qa_model_pipeline_en_5.5.0_3.0_1725654696028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw10_qa_model_pipeline_en_5.5.0_3.0_1725654696028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hw10_qa_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hw10_qa_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw10_qa_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/annajohn/hw10_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-idmgsp_roberta_train_conclusion_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-idmgsp_roberta_train_conclusion_pipeline_en.md new file mode 100644 index 00000000000000..201b13dc681cd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-idmgsp_roberta_train_conclusion_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English idmgsp_roberta_train_conclusion_pipeline pipeline RoBertaForSequenceClassification from tum-nlp +author: John Snow Labs +name: idmgsp_roberta_train_conclusion_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`idmgsp_roberta_train_conclusion_pipeline` is a English model originally trained by tum-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/idmgsp_roberta_train_conclusion_pipeline_en_5.5.0_3.0_1725613591234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/idmgsp_roberta_train_conclusion_pipeline_en_5.5.0_3.0_1725613591234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("idmgsp_roberta_train_conclusion_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("idmgsp_roberta_train_conclusion_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|idmgsp_roberta_train_conclusion_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|452.2 MB| + +## References + +https://huggingface.co/tum-nlp/IDMGSP-RoBERTa-TRAIN-CONCLUSION + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-imdb_gpt2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-imdb_gpt2_pipeline_en.md new file mode 100644 index 00000000000000..d687fc3683ff2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-imdb_gpt2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_gpt2_pipeline pipeline DistilBertForSequenceClassification from dhrubochowdhury5758778 +author: John Snow Labs +name: imdb_gpt2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_gpt2_pipeline` is a English model originally trained by dhrubochowdhury5758778. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_gpt2_pipeline_en_5.5.0_3.0_1725608063255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_gpt2_pipeline_en_5.5.0_3.0_1725608063255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_gpt2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_gpt2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_gpt2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/dhrubochowdhury5758778/IMDb-gpt2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_en.md new file mode 100644 index 00000000000000..20fc2a3fc9a78f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English imdb_v1 DistilBertForSequenceClassification from flexnotop +author: John Snow Labs +name: imdb_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_v1` is a English model originally trained by flexnotop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_v1_en_5.5.0_3.0_1725608543088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_v1_en_5.5.0_3.0_1725608543088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/flexnotop/imdb-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_pipeline_en.md new file mode 100644 index 00000000000000..647839de46cd82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-imdb_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_v1_pipeline pipeline DistilBertForSequenceClassification from flexnotop +author: John Snow Labs +name: imdb_v1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_v1_pipeline` is a English model originally trained by flexnotop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_v1_pipeline_en_5.5.0_3.0_1725608554568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_v1_pipeline_en_5.5.0_3.0_1725608554568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/flexnotop/imdb-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-inde_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-inde_1_en.md new file mode 100644 index 00000000000000..7e3963610fe542 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-inde_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English inde_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: inde_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inde_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inde_1_en_5.5.0_3.0_1725612964987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inde_1_en_5.5.0_3.0_1725612964987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inde_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Inde_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-inde_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-inde_3_en.md new file mode 100644 index 00000000000000..23622f1a744ea4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-inde_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English inde_3 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: inde_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inde_3` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inde_3_en_5.5.0_3.0_1725612629958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inde_3_en_5.5.0_3.0_1725612629958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inde_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Inde_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-inde_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-inde_3_pipeline_en.md new file mode 100644 index 00000000000000..7d7f7194c842fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-inde_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English inde_3_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: inde_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inde_3_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inde_3_pipeline_en_5.5.0_3.0_1725612654176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inde_3_pipeline_en_5.5.0_3.0_1725612654176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("inde_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("inde_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inde_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Inde_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-indo_aryan_xlm_r_base_pipeline_gu.md b/docs/_posts/ahmedlone127/2024-09-06-indo_aryan_xlm_r_base_pipeline_gu.md new file mode 100644 index 00000000000000..c703d197065644 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-indo_aryan_xlm_r_base_pipeline_gu.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Gujarati indo_aryan_xlm_r_base_pipeline pipeline XlmRoBertaEmbeddings from ashwani-tanwar +author: John Snow Labs +name: indo_aryan_xlm_r_base_pipeline +date: 2024-09-06 +tags: [gu, open_source, pipeline, onnx] +task: Embeddings +language: gu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indo_aryan_xlm_r_base_pipeline` is a Gujarati model originally trained by ashwani-tanwar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indo_aryan_xlm_r_base_pipeline_gu_5.5.0_3.0_1725626961377.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indo_aryan_xlm_r_base_pipeline_gu_5.5.0_3.0_1725626961377.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indo_aryan_xlm_r_base_pipeline", lang = "gu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indo_aryan_xlm_r_base_pipeline", lang = "gu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indo_aryan_xlm_r_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|gu| +|Size:|651.9 MB| + +## References + +https://huggingface.co/ashwani-tanwar/Indo-Aryan-XLM-R-Base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-intentclassification_v3_kssumanth6_en.md b/docs/_posts/ahmedlone127/2024-09-06-intentclassification_v3_kssumanth6_en.md new file mode 100644 index 00000000000000..e8382987a61ec7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-intentclassification_v3_kssumanth6_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English intentclassification_v3_kssumanth6 DistilBertForSequenceClassification from kssumanth6 +author: John Snow Labs +name: intentclassification_v3_kssumanth6 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intentclassification_v3_kssumanth6` is a English model originally trained by kssumanth6. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intentclassification_v3_kssumanth6_en_5.5.0_3.0_1725608431304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intentclassification_v3_kssumanth6_en_5.5.0_3.0_1725608431304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("intentclassification_v3_kssumanth6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("intentclassification_v3_kssumanth6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intentclassification_v3_kssumanth6| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.4 MB| + +## References + +https://huggingface.co/kssumanth6/IntentClassification_V3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_en.md new file mode 100644 index 00000000000000..ca8d19403e3644 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English intropln_setfit_model MPNetEmbeddings from diegofernandezc +author: John Snow Labs +name: intropln_setfit_model +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intropln_setfit_model` is a English model originally trained by diegofernandezc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intropln_setfit_model_en_5.5.0_3.0_1725595629494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intropln_setfit_model_en_5.5.0_3.0_1725595629494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("intropln_setfit_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("intropln_setfit_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intropln_setfit_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/diegofernandezc/intropln-setfit-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_pipeline_en.md new file mode 100644 index 00000000000000..748ffe4d4fe1e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-intropln_setfit_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English intropln_setfit_model_pipeline pipeline MPNetEmbeddings from diegofernandezc +author: John Snow Labs +name: intropln_setfit_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intropln_setfit_model_pipeline` is a English model originally trained by diegofernandezc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intropln_setfit_model_pipeline_en_5.5.0_3.0_1725595649464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intropln_setfit_model_pipeline_en_5.5.0_3.0_1725595649464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("intropln_setfit_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("intropln_setfit_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intropln_setfit_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/diegofernandezc/intropln-setfit-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-investopedia_qna_en.md b/docs/_posts/ahmedlone127/2024-09-06-investopedia_qna_en.md new file mode 100644 index 00000000000000..49bc19b44190d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-investopedia_qna_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English investopedia_qna DistilBertForQuestionAnswering from ruishan-lin +author: John Snow Labs +name: investopedia_qna +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`investopedia_qna` is a English model originally trained by ruishan-lin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/investopedia_qna_en_5.5.0_3.0_1725622094475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/investopedia_qna_en_5.5.0_3.0_1725622094475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("investopedia_qna","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("investopedia_qna", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|investopedia_qna| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ruishan-lin/investopedia-QnA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_kn.md b/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_kn.md new file mode 100644 index 00000000000000..1355ff0ea6f602 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_kn.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Kannada kannada_ner BertForTokenClassification from aparnabhat +author: John Snow Labs +name: kannada_ner +date: 2024-09-06 +tags: [kn, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: kn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kannada_ner` is a Kannada model originally trained by aparnabhat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kannada_ner_kn_5.5.0_3.0_1725600844816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kannada_ner_kn_5.5.0_3.0_1725600844816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("kannada_ner","kn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("kannada_ner", "kn") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kannada_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|kn| +|Size:|665.1 MB| + +## References + +https://huggingface.co/aparnabhat/kannada-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_pipeline_kn.md b/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_pipeline_kn.md new file mode 100644 index 00000000000000..ec5fa55983580d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-kannada_ner_pipeline_kn.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Kannada kannada_ner_pipeline pipeline BertForTokenClassification from aparnabhat +author: John Snow Labs +name: kannada_ner_pipeline +date: 2024-09-06 +tags: [kn, open_source, pipeline, onnx] +task: Named Entity Recognition +language: kn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kannada_ner_pipeline` is a Kannada model originally trained by aparnabhat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kannada_ner_pipeline_kn_5.5.0_3.0_1725600876283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kannada_ner_pipeline_kn_5.5.0_3.0_1725600876283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("kannada_ner_pipeline", lang = "kn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("kannada_ner_pipeline", lang = "kn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kannada_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|kn| +|Size:|665.1 MB| + +## References + +https://huggingface.co/aparnabhat/kannada-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_en.md new file mode 100644 index 00000000000000..9a9b85b949de3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_finetuning_bill1886 MarianTransformer from bill1886 +author: John Snow Labs +name: lab1_finetuning_bill1886 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_bill1886` is a English model originally trained by bill1886. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_bill1886_en_5.5.0_3.0_1725635340275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_bill1886_en_5.5.0_3.0_1725635340275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_finetuning_bill1886","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_finetuning_bill1886","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_bill1886| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/bill1886/lab1_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_pipeline_en.md new file mode 100644 index 00000000000000..69bb0f0e072e3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_bill1886_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_finetuning_bill1886_pipeline pipeline MarianTransformer from bill1886 +author: John Snow Labs +name: lab1_finetuning_bill1886_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_bill1886_pipeline` is a English model originally trained by bill1886. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_bill1886_pipeline_en_5.5.0_3.0_1725635370271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_bill1886_pipeline_en_5.5.0_3.0_1725635370271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_finetuning_bill1886_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_finetuning_bill1886_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_bill1886_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/bill1886/lab1_finetuning + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_en.md new file mode 100644 index 00000000000000..d2100bd2420677 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_finetuning_chenxin0903 MarianTransformer from Chenxin0903 +author: John Snow Labs +name: lab1_finetuning_chenxin0903 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_chenxin0903` is a English model originally trained by Chenxin0903. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_chenxin0903_en_5.5.0_3.0_1725636209982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_chenxin0903_en_5.5.0_3.0_1725636209982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_finetuning_chenxin0903","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_finetuning_chenxin0903","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_chenxin0903| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.3 MB| + +## References + +https://huggingface.co/Chenxin0903/lab1_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_pipeline_en.md new file mode 100644 index 00000000000000..2e764923b0b2e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_chenxin0903_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_finetuning_chenxin0903_pipeline pipeline MarianTransformer from Chenxin0903 +author: John Snow Labs +name: lab1_finetuning_chenxin0903_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_chenxin0903_pipeline` is a English model originally trained by Chenxin0903. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_chenxin0903_pipeline_en_5.5.0_3.0_1725636235215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_chenxin0903_pipeline_en_5.5.0_3.0_1725636235215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_finetuning_chenxin0903_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_finetuning_chenxin0903_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_chenxin0903_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/Chenxin0903/lab1_finetuning + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_en.md new file mode 100644 index 00000000000000..2277412263af25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_finetuning_muyuanma MarianTransformer from muyuanma +author: John Snow Labs +name: lab1_finetuning_muyuanma +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_muyuanma` is a English model originally trained by muyuanma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_muyuanma_en_5.5.0_3.0_1725635534796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_muyuanma_en_5.5.0_3.0_1725635534796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_finetuning_muyuanma","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_finetuning_muyuanma","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_muyuanma| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/muyuanma/lab1_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_pipeline_en.md new file mode 100644 index 00000000000000..8680cbc049c5ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lab1_finetuning_muyuanma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_finetuning_muyuanma_pipeline pipeline MarianTransformer from muyuanma +author: John Snow Labs +name: lab1_finetuning_muyuanma_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_muyuanma_pipeline` is a English model originally trained by muyuanma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_muyuanma_pipeline_en_5.5.0_3.0_1725635560657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_muyuanma_pipeline_en_5.5.0_3.0_1725635560657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_finetuning_muyuanma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_finetuning_muyuanma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_muyuanma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/muyuanma/lab1_finetuning + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_pipeline_th.md b/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_pipeline_th.md new file mode 100644 index 00000000000000..809ab95abbe1f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_pipeline_th.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Thai latte_mc_bert_base_thai_ws_pipeline pipeline BertForTokenClassification from yacht +author: John Snow Labs +name: latte_mc_bert_base_thai_ws_pipeline +date: 2024-09-06 +tags: [th, open_source, pipeline, onnx] +task: Named Entity Recognition +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`latte_mc_bert_base_thai_ws_pipeline` is a Thai model originally trained by yacht. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/latte_mc_bert_base_thai_ws_pipeline_th_5.5.0_3.0_1725634556869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/latte_mc_bert_base_thai_ws_pipeline_th_5.5.0_3.0_1725634556869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("latte_mc_bert_base_thai_ws_pipeline", lang = "th") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("latte_mc_bert_base_thai_ws_pipeline", lang = "th") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|latte_mc_bert_base_thai_ws_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|th| +|Size:|1.1 GB| + +## References + +https://huggingface.co/yacht/latte-mc-bert-base-thai-ws + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_th.md b/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_th.md new file mode 100644 index 00000000000000..4e54dc42c52d92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-latte_mc_bert_base_thai_ws_th.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Thai latte_mc_bert_base_thai_ws BertForTokenClassification from yacht +author: John Snow Labs +name: latte_mc_bert_base_thai_ws +date: 2024-09-06 +tags: [th, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`latte_mc_bert_base_thai_ws` is a Thai model originally trained by yacht. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/latte_mc_bert_base_thai_ws_th_5.5.0_3.0_1725634500256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/latte_mc_bert_base_thai_ws_th_5.5.0_3.0_1725634500256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("latte_mc_bert_base_thai_ws","th") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("latte_mc_bert_base_thai_ws", "th") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|latte_mc_bert_base_thai_ws| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|th| +|Size:|1.1 GB| + +## References + +https://huggingface.co/yacht/latte-mc-bert-base-thai-ws \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_en.md b/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_en.md new file mode 100644 index 00000000000000..1a1e95e86df6f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lenate_model_8 AlbertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: lenate_model_8 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lenate_model_8` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lenate_model_8_en_5.5.0_3.0_1725628277388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lenate_model_8_en_5.5.0_3.0_1725628277388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("lenate_model_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("lenate_model_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lenate_model_8| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/KalaiselvanD/lenate_model_8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_pipeline_en.md new file mode 100644 index 00000000000000..4d949526ce5ca2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-lenate_model_8_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lenate_model_8_pipeline pipeline AlbertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: lenate_model_8_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lenate_model_8_pipeline` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lenate_model_8_pipeline_en_5.5.0_3.0_1725628280036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lenate_model_8_pipeline_en_5.5.0_3.0_1725628280036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lenate_model_8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lenate_model_8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lenate_model_8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/KalaiselvanD/lenate_model_8 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..6ec34275b8b14f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English llm_firewall_distilbert_base_uncased DistilBertForSequenceClassification from cgoosen +author: John Snow Labs +name: llm_firewall_distilbert_base_uncased +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llm_firewall_distilbert_base_uncased` is a English model originally trained by cgoosen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llm_firewall_distilbert_base_uncased_en_5.5.0_3.0_1725607946091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llm_firewall_distilbert_base_uncased_en_5.5.0_3.0_1725607946091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("llm_firewall_distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("llm_firewall_distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llm_firewall_distilbert_base_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cgoosen/llm_firewall_distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_pipeline_en.md new file mode 100644 index 00000000000000..985cf62121fe33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-llm_firewall_distilbert_base_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English llm_firewall_distilbert_base_uncased_pipeline pipeline DistilBertForSequenceClassification from cgoosen +author: John Snow Labs +name: llm_firewall_distilbert_base_uncased_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llm_firewall_distilbert_base_uncased_pipeline` is a English model originally trained by cgoosen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llm_firewall_distilbert_base_uncased_pipeline_en_5.5.0_3.0_1725607964035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llm_firewall_distilbert_base_uncased_pipeline_en_5.5.0_3.0_1725607964035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("llm_firewall_distilbert_base_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("llm_firewall_distilbert_base_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llm_firewall_distilbert_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cgoosen/llm_firewall_distilbert-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx.md new file mode 100644 index 00000000000000..e6a41d7754dc31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline pipeline BertForTokenClassification from microsoft +author: John Snow Labs +name: llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline` is a Multilingual model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx_5.5.0_3.0_1725600315664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline_xx_5.5.0_3.0_1725600315664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.4 MB| + +## References + +https://huggingface.co/microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx.md b/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx.md new file mode 100644 index 00000000000000..58f9a8895a7ef8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft BertForTokenClassification from microsoft +author: John Snow Labs +name: llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft +date: 2024-09-06 +tags: [xx, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft` is a Multilingual model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx_5.5.0_3.0_1725600282570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft_xx_5.5.0_3.0_1725600282570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llmlingua_2_bert_base_multilingual_cased_meetingbank_microsoft| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|665.3 MB| + +## References + +https://huggingface.co/microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_ms.md b/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_ms.md new file mode 100644 index 00000000000000..3bac9de86a958e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_ms.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Malay (macrolanguage) malay_sentiment_deberta_xsmall DeBertaForSequenceClassification from malaysia-ai +author: John Snow Labs +name: malay_sentiment_deberta_xsmall +date: 2024-09-06 +tags: [ms, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: ms +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malay_sentiment_deberta_xsmall` is a Malay (macrolanguage) model originally trained by malaysia-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malay_sentiment_deberta_xsmall_ms_5.5.0_3.0_1725609866864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malay_sentiment_deberta_xsmall_ms_5.5.0_3.0_1725609866864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("malay_sentiment_deberta_xsmall","ms") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("malay_sentiment_deberta_xsmall", "ms") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malay_sentiment_deberta_xsmall| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|ms| +|Size:|240.4 MB| + +## References + +https://huggingface.co/malaysia-ai/malay-sentiment-deberta-xsmall \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_pipeline_ms.md b/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_pipeline_ms.md new file mode 100644 index 00000000000000..ffa258b7a5e2cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-malay_sentiment_deberta_xsmall_pipeline_ms.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Malay (macrolanguage) malay_sentiment_deberta_xsmall_pipeline pipeline DeBertaForSequenceClassification from malaysia-ai +author: John Snow Labs +name: malay_sentiment_deberta_xsmall_pipeline +date: 2024-09-06 +tags: [ms, open_source, pipeline, onnx] +task: Text Classification +language: ms +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malay_sentiment_deberta_xsmall_pipeline` is a Malay (macrolanguage) model originally trained by malaysia-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malay_sentiment_deberta_xsmall_pipeline_ms_5.5.0_3.0_1725609888005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malay_sentiment_deberta_xsmall_pipeline_ms_5.5.0_3.0_1725609888005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("malay_sentiment_deberta_xsmall_pipeline", lang = "ms") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("malay_sentiment_deberta_xsmall_pipeline", lang = "ms") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malay_sentiment_deberta_xsmall_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ms| +|Size:|240.4 MB| + +## References + +https://huggingface.co/malaysia-ai/malay-sentiment-deberta-xsmall + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_ms.md b/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_ms.md new file mode 100644 index 00000000000000..cc6773d27d7fd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_ms.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Malay (macrolanguage) malaysian_whisper_base WhisperForCTC from mesolitica +author: John Snow Labs +name: malaysian_whisper_base +date: 2024-09-06 +tags: [ms, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ms +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malaysian_whisper_base` is a Malay (macrolanguage) model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malaysian_whisper_base_ms_5.5.0_3.0_1725605863226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malaysian_whisper_base_ms_5.5.0_3.0_1725605863226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("malaysian_whisper_base","ms") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("malaysian_whisper_base", "ms") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malaysian_whisper_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ms| +|Size:|314.1 MB| + +## References + +https://huggingface.co/mesolitica/malaysian-whisper-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_pipeline_ms.md b/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_pipeline_ms.md new file mode 100644 index 00000000000000..1360dae83310b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-malaysian_whisper_base_pipeline_ms.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Malay (macrolanguage) malaysian_whisper_base_pipeline pipeline WhisperForCTC from mesolitica +author: John Snow Labs +name: malaysian_whisper_base_pipeline +date: 2024-09-06 +tags: [ms, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ms +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malaysian_whisper_base_pipeline` is a Malay (macrolanguage) model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malaysian_whisper_base_pipeline_ms_5.5.0_3.0_1725605966395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malaysian_whisper_base_pipeline_ms_5.5.0_3.0_1725605966395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("malaysian_whisper_base_pipeline", lang = "ms") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("malaysian_whisper_base_pipeline", lang = "ms") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malaysian_whisper_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ms| +|Size:|314.1 MB| + +## References + +https://huggingface.co/mesolitica/malaysian-whisper-base + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_mr.md b/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_mr.md new file mode 100644 index 00000000000000..56bc078b17a7aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_mr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Marathi marathi_albert_v2 AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_albert_v2 +date: 2024-09-06 +tags: [mr, open_source, onnx, embeddings, albert] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_albert_v2` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_albert_v2_mr_5.5.0_3.0_1725615568627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_albert_v2_mr_5.5.0_3.0_1725615568627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = AlbertEmbeddings.pretrained("marathi_albert_v2","mr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = AlbertEmbeddings.pretrained("marathi_albert_v2","mr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_albert_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[albert]| +|Language:|mr| +|Size:|125.5 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-albert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_pipeline_mr.md new file mode 100644 index 00000000000000..810c3f94c90bbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marathi_albert_v2_pipeline_mr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Marathi marathi_albert_v2_pipeline pipeline AlbertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_albert_v2_pipeline +date: 2024-09-06 +tags: [mr, open_source, pipeline, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_albert_v2_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_albert_v2_pipeline_mr_5.5.0_3.0_1725615574773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_albert_v2_pipeline_mr_5.5.0_3.0_1725615574773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marathi_albert_v2_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marathi_albert_v2_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_albert_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|125.5 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-albert-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marathi_marh_val_g_mr.md b/docs/_posts/ahmedlone127/2024-09-06-marathi_marh_val_g_mr.md new file mode 100644 index 00000000000000..52c555d324e6c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marathi_marh_val_g_mr.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Marathi marathi_marh_val_g WhisperForCTC from simran14 +author: John Snow Labs +name: marathi_marh_val_g +date: 2024-09-06 +tags: [mr, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_marh_val_g` is a Marathi model originally trained by simran14. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_marh_val_g_mr_5.5.0_3.0_1725647450048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_marh_val_g_mr_5.5.0_3.0_1725647450048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("marathi_marh_val_g","mr") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("marathi_marh_val_g", "mr") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_marh_val_g| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|mr| +|Size:|1.7 GB| + +## References + +https://huggingface.co/simran14/mr-val-g \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en.md new file mode 100644 index 00000000000000..a6afdb9fad5408 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123 MarianTransformer from charliealex123 +author: John Snow Labs +name: marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123` is a English model originally trained by charliealex123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en_5.5.0_3.0_1725635159018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123_en_5.5.0_3.0_1725635159018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_chinese_tonga_tonga_islands_english_charliealex123| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|540.0 MB| + +## References + +https://huggingface.co/charliealex123/marian-finetuned-kde4-zh-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en.md new file mode 100644 index 00000000000000..7d763a4a1b641e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline pipeline MarianTransformer from longma98 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline` is a English model originally trained by longma98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en_5.5.0_3.0_1725635990983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline_en_5.5.0_3.0_1725635990983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_longma98_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/longma98/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en.md new file mode 100644 index 00000000000000..41734381d0f617 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline pipeline MarianTransformer from Indah1 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline` is a English model originally trained by Indah1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en_5.5.0_3.0_1725634989453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline_en_5.5.0_3.0_1725634989453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_indah1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.7 MB| + +## References + +https://huggingface.co/Indah1/marian-finetuned-kde4-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en.md new file mode 100644 index 00000000000000..ac56e040527766 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan MarianTransformer from JHhan +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan` is a English model originally trained by JHhan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en_5.5.0_3.0_1725635809897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan_en_5.5.0_3.0_1725635809897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_jhhan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/JHhan/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en.md new file mode 100644 index 00000000000000..5924ec67cbc39e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate MarianTransformer from VanHoan +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate` is a English model originally trained by VanHoan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en_5.5.0_3.0_1725635507472.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_en_5.5.0_3.0_1725635507472.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|474.4 MB| + +## References + +https://huggingface.co/VanHoan/marian-finetuned-kde4-en-to-vi-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en.md new file mode 100644 index 00000000000000..4808378c34ee35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline pipeline MarianTransformer from VanHoan +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline` is a English model originally trained by VanHoan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en_5.5.0_3.0_1725635532125.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline_en_5.5.0_3.0_1725635532125.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_vietnamese_accelerate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|475.0 MB| + +## References + +https://huggingface.co/VanHoan/marian-finetuned-kde4-en-to-vi-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_en.md new file mode 100644 index 00000000000000..a24eb038fb49e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_maltese_en2vi MarianTransformer from thangvip +author: John Snow Labs +name: marian_finetuned_maltese_en2vi +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_maltese_en2vi` is a English model originally trained by thangvip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_maltese_en2vi_en_5.5.0_3.0_1725635142684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_maltese_en2vi_en_5.5.0_3.0_1725635142684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_maltese_en2vi","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_maltese_en2vi","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_maltese_en2vi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|475.1 MB| + +## References + +https://huggingface.co/thangvip/marian-finetuned-mt-en2vi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_pipeline_en.md new file mode 100644 index 00000000000000..ba185606acb820 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_finetuned_maltese_en2vi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_maltese_en2vi_pipeline pipeline MarianTransformer from thangvip +author: John Snow Labs +name: marian_finetuned_maltese_en2vi_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_maltese_en2vi_pipeline` is a English model originally trained by thangvip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_maltese_en2vi_pipeline_en_5.5.0_3.0_1725635166286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_maltese_en2vi_pipeline_en_5.5.0_3.0_1725635166286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_maltese_en2vi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_maltese_en2vi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_maltese_en2vi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|475.7 MB| + +## References + +https://huggingface.co/thangvip/marian-finetuned-mt-en2vi + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_formality_fine_tuned_english_polish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_formality_fine_tuned_english_polish_pipeline_en.md new file mode 100644 index 00000000000000..75a257a7de2d04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_formality_fine_tuned_english_polish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_formality_fine_tuned_english_polish_pipeline pipeline MarianTransformer from laniqo +author: John Snow Labs +name: marian_formality_fine_tuned_english_polish_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_formality_fine_tuned_english_polish_pipeline` is a English model originally trained by laniqo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_formality_fine_tuned_english_polish_pipeline_en_5.5.0_3.0_1725636045607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_formality_fine_tuned_english_polish_pipeline_en_5.5.0_3.0_1725636045607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_formality_fine_tuned_english_polish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_formality_fine_tuned_english_polish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_formality_fine_tuned_english_polish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|343.0 MB| + +## References + +https://huggingface.co/laniqo/marian_formality_fine_tuned_en_pl + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_en.md new file mode 100644 index 00000000000000..f0154d7553cb2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_nmt_enid MarianTransformer from Wikidepia +author: John Snow Labs +name: marian_nmt_enid +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_nmt_enid` is a English model originally trained by Wikidepia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_nmt_enid_en_5.5.0_3.0_1725635547634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_nmt_enid_en_5.5.0_3.0_1725635547634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_nmt_enid","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_nmt_enid","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_nmt_enid| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|224.0 MB| + +## References + +https://huggingface.co/Wikidepia/marian-nmt-enid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_pipeline_en.md new file mode 100644 index 00000000000000..51a44322aa75d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marian_nmt_enid_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_nmt_enid_pipeline pipeline MarianTransformer from Wikidepia +author: John Snow Labs +name: marian_nmt_enid_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_nmt_enid_pipeline` is a English model originally trained by Wikidepia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_nmt_enid_pipeline_en_5.5.0_3.0_1725635613201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_nmt_enid_pipeline_en_5.5.0_3.0_1725635613201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_nmt_enid_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_nmt_enid_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_nmt_enid_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|224.5 MB| + +## References + +https://huggingface.co/Wikidepia/marian-nmt-enid + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-marianmix_english_chinese_10_deskdown_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-marianmix_english_chinese_10_deskdown_pipeline_en.md new file mode 100644 index 00000000000000..2ae7670e431cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-marianmix_english_chinese_10_deskdown_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marianmix_english_chinese_10_deskdown_pipeline pipeline MarianTransformer from DeskDown +author: John Snow Labs +name: marianmix_english_chinese_10_deskdown_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmix_english_chinese_10_deskdown_pipeline` is a English model originally trained by DeskDown. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmix_english_chinese_10_deskdown_pipeline_en_5.5.0_3.0_1725635047016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmix_english_chinese_10_deskdown_pipeline_en_5.5.0_3.0_1725635047016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marianmix_english_chinese_10_deskdown_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marianmix_english_chinese_10_deskdown_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmix_english_chinese_10_deskdown_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|751.6 MB| + +## References + +https://huggingface.co/DeskDown/MarianMix_en-zh-10 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-masked_lm_shakespeare_en.md b/docs/_posts/ahmedlone127/2024-09-06-masked_lm_shakespeare_en.md new file mode 100644 index 00000000000000..5eb68a6d91924e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-masked_lm_shakespeare_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English masked_lm_shakespeare DistilBertEmbeddings from perceptron-743 +author: John Snow Labs +name: masked_lm_shakespeare +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_lm_shakespeare` is a English model originally trained by perceptron-743. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_lm_shakespeare_en_5.5.0_3.0_1725664893391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_lm_shakespeare_en_5.5.0_3.0_1725664893391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("masked_lm_shakespeare","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("masked_lm_shakespeare","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_lm_shakespeare| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/perceptron-743/masked-lm-shakespeare \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_base_v3_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_base_v3_2_en.md new file mode 100644 index 00000000000000..6974a8f9c52227 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_base_v3_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_base_v3_2 DeBertaForSequenceClassification from alyazharr +author: John Snow Labs +name: mdeberta_base_v3_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_base_v3_2` is a English model originally trained by alyazharr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_2_en_5.5.0_3.0_1725611585695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_base_v3_2_en_5.5.0_3.0_1725611585695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_base_v3_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_base_v3_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|832.6 MB| + +## References + +https://huggingface.co/alyazharr/mdeberta_base_v3_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_en.md new file mode 100644 index 00000000000000..312888f1493d90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_nli_bilingual_2_0 DeBertaForSequenceClassification from rwillh11 +author: John Snow Labs +name: mdeberta_nli_bilingual_2_0 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_nli_bilingual_2_0` is a English model originally trained by rwillh11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_nli_bilingual_2_0_en_5.5.0_3.0_1725588880807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_nli_bilingual_2_0_en_5.5.0_3.0_1725588880807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_nli_bilingual_2_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_nli_bilingual_2_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_nli_bilingual_2_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|856.3 MB| + +## References + +https://huggingface.co/rwillh11/mdeberta_NLI_bilingual_2.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_pipeline_en.md new file mode 100644 index 00000000000000..843266843237bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_nli_bilingual_2_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_nli_bilingual_2_0_pipeline pipeline DeBertaForSequenceClassification from rwillh11 +author: John Snow Labs +name: mdeberta_nli_bilingual_2_0_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_nli_bilingual_2_0_pipeline` is a English model originally trained by rwillh11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_nli_bilingual_2_0_pipeline_en_5.5.0_3.0_1725588934431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_nli_bilingual_2_0_pipeline_en_5.5.0_3.0_1725588934431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_nli_bilingual_2_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_nli_bilingual_2_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_nli_bilingual_2_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|856.3 MB| + +## References + +https://huggingface.co/rwillh11/mdeberta_NLI_bilingual_2.0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pipeline_pt.md new file mode 100644 index 00000000000000..111f8bf993c1a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese mdeberta_v3_base_assin_similarity_pipeline pipeline DeBertaForSequenceClassification from ruanchaves +author: John Snow Labs +name: mdeberta_v3_base_assin_similarity_pipeline +date: 2024-09-06 +tags: [pt, open_source, pipeline, onnx] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_assin_similarity_pipeline` is a Portuguese model originally trained by ruanchaves. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_similarity_pipeline_pt_5.5.0_3.0_1725589328198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_similarity_pipeline_pt_5.5.0_3.0_1725589328198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_assin_similarity_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_assin_similarity_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_assin_similarity_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|851.4 MB| + +## References + +https://huggingface.co/ruanchaves/mdeberta-v3-base-assin-similarity + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pt.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pt.md new file mode 100644 index 00000000000000..1d004c639a709d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_assin_similarity_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese mdeberta_v3_base_assin_similarity DeBertaForSequenceClassification from ruanchaves +author: John Snow Labs +name: mdeberta_v3_base_assin_similarity +date: 2024-09-06 +tags: [pt, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_assin_similarity` is a Portuguese model originally trained by ruanchaves. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_similarity_pt_5.5.0_3.0_1725589272477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_assin_similarity_pt_5.5.0_3.0_1725589272477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_assin_similarity","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_assin_similarity", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_assin_similarity| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|pt| +|Size:|851.4 MB| + +## References + +https://huggingface.co/ruanchaves/mdeberta-v3-base-assin-similarity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_hatebr_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_hatebr_pipeline_pt.md new file mode 100644 index 00000000000000..e0e13e58e7b9c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_hatebr_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese mdeberta_v3_base_hatebr_pipeline pipeline DeBertaForSequenceClassification from ruanchaves +author: John Snow Labs +name: mdeberta_v3_base_hatebr_pipeline +date: 2024-09-06 +tags: [pt, open_source, pipeline, onnx] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_hatebr_pipeline` is a Portuguese model originally trained by ruanchaves. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_hatebr_pipeline_pt_5.5.0_3.0_1725590919075.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_hatebr_pipeline_pt_5.5.0_3.0_1725590919075.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_hatebr_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_hatebr_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_hatebr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|844.9 MB| + +## References + +https://huggingface.co/ruanchaves/mdeberta-v3-base-hatebr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_en.md new file mode 100644 index 00000000000000..affafd6a7ec96a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_qnli_100 DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qnli_100 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qnli_100` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_100_en_5.5.0_3.0_1725609804404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_100_en_5.5.0_3.0_1725609804404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qnli_100","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qnli_100", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qnli_100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qnli-100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_pipeline_en.md new file mode 100644 index 00000000000000..40c2284ef7d1b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_100_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_qnli_100_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qnli_100_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qnli_100_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_100_pipeline_en_5.5.0_3.0_1725609940036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_100_pipeline_en_5.5.0_3.0_1725609940036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_qnli_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_qnli_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qnli_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qnli-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_en.md new file mode 100644 index 00000000000000..6146416d7650de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_qnli_1 DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qnli_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qnli_1` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_1_en_5.5.0_3.0_1725589121971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_1_en_5.5.0_3.0_1725589121971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qnli_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qnli_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qnli_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qnli-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_pipeline_en.md new file mode 100644 index 00000000000000..aa969ff80d1219 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qnli_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_qnli_1_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qnli_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qnli_1_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_1_pipeline_en_5.5.0_3.0_1725589250333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qnli_1_pipeline_en_5.5.0_3.0_1725589250333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_qnli_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_qnli_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qnli_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qnli-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_en.md new file mode 100644 index 00000000000000..d581dc9afb0e4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mdeberta_v3_base_qqp_100 DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qqp_100 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qqp_100` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_100_en_5.5.0_3.0_1725611022095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_100_en_5.5.0_3.0_1725611022095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qqp_100","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mdeberta_v3_base_qqp_100", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qqp_100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|834.7 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qqp-100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_pipeline_en.md new file mode 100644 index 00000000000000..9edb2899ae33a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mdeberta_v3_base_qqp_100_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mdeberta_v3_base_qqp_100_pipeline pipeline DeBertaForSequenceClassification from tmnam20 +author: John Snow Labs +name: mdeberta_v3_base_qqp_100_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdeberta_v3_base_qqp_100_pipeline` is a English model originally trained by tmnam20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_100_pipeline_en_5.5.0_3.0_1725611152937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdeberta_v3_base_qqp_100_pipeline_en_5.5.0_3.0_1725611152937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mdeberta_v3_base_qqp_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mdeberta_v3_base_qqp_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdeberta_v3_base_qqp_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.8 MB| + +## References + +https://huggingface.co/tmnam20/mdeberta-v3-base-qqp-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-med_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-med_bert_pipeline_en.md new file mode 100644 index 00000000000000..9dd51c9d50d86b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-med_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English med_bert_pipeline pipeline BertForTokenClassification from praneethvasarla +author: John Snow Labs +name: med_bert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`med_bert_pipeline` is a English model originally trained by praneethvasarla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/med_bert_pipeline_en_5.5.0_3.0_1725634312835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/med_bert_pipeline_en_5.5.0_3.0_1725634312835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("med_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("med_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|med_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/praneethvasarla/med-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_nl.md b/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_nl.md new file mode 100644 index 00000000000000..b7893f526250c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_nl.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Dutch, Flemish medroberta_dutch_experiencer RoBertaForTokenClassification from UMCU +author: John Snow Labs +name: medroberta_dutch_experiencer +date: 2024-09-06 +tags: [nl, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: nl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medroberta_dutch_experiencer` is a Dutch, Flemish model originally trained by UMCU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medroberta_dutch_experiencer_nl_5.5.0_3.0_1725624631892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medroberta_dutch_experiencer_nl_5.5.0_3.0_1725624631892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("medroberta_dutch_experiencer","nl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("medroberta_dutch_experiencer", "nl") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medroberta_dutch_experiencer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|nl| +|Size:|469.7 MB| + +## References + +https://huggingface.co/UMCU/MedRoBERTa.nl_Experiencer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_pipeline_nl.md b/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_pipeline_nl.md new file mode 100644 index 00000000000000..17f780c054d32c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-medroberta_dutch_experiencer_pipeline_nl.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Dutch, Flemish medroberta_dutch_experiencer_pipeline pipeline RoBertaForTokenClassification from UMCU +author: John Snow Labs +name: medroberta_dutch_experiencer_pipeline +date: 2024-09-06 +tags: [nl, open_source, pipeline, onnx] +task: Named Entity Recognition +language: nl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medroberta_dutch_experiencer_pipeline` is a Dutch, Flemish model originally trained by UMCU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medroberta_dutch_experiencer_pipeline_nl_5.5.0_3.0_1725624655916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medroberta_dutch_experiencer_pipeline_nl_5.5.0_3.0_1725624655916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("medroberta_dutch_experiencer_pipeline", lang = "nl") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("medroberta_dutch_experiencer_pipeline", lang = "nl") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medroberta_dutch_experiencer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|nl| +|Size:|469.8 MB| + +## References + +https://huggingface.co/UMCU/MedRoBERTa.nl_Experiencer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_en.md b/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_en.md new file mode 100644 index 00000000000000..9e01db8b9d91c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English megabatchmarginloss_10 MPNetEmbeddings from marianodo +author: John Snow Labs +name: megabatchmarginloss_10 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`megabatchmarginloss_10` is a English model originally trained by marianodo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/megabatchmarginloss_10_en_5.5.0_3.0_1725595395418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/megabatchmarginloss_10_en_5.5.0_3.0_1725595395418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("megabatchmarginloss_10","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("megabatchmarginloss_10","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|megabatchmarginloss_10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/marianodo/MegaBatchMarginLoss-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_pipeline_en.md new file mode 100644 index 00000000000000..2c9221e608eb5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-megabatchmarginloss_10_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English megabatchmarginloss_10_pipeline pipeline MPNetEmbeddings from marianodo +author: John Snow Labs +name: megabatchmarginloss_10_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`megabatchmarginloss_10_pipeline` is a English model originally trained by marianodo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/megabatchmarginloss_10_pipeline_en_5.5.0_3.0_1725595416565.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/megabatchmarginloss_10_pipeline_en_5.5.0_3.0_1725595416565.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("megabatchmarginloss_10_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("megabatchmarginloss_10_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|megabatchmarginloss_10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/marianodo/MegaBatchMarginLoss-10 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_en.md new file mode 100644 index 00000000000000..4fddb519ddb0a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English memo_bert_sanskrit_saskta_3 XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: memo_bert_sanskrit_saskta_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_sanskrit_saskta_3` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_3_en_5.5.0_3.0_1725616654819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_3_en_5.5.0_3.0_1725616654819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_sanskrit_saskta_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_sanskrit_saskta_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_sanskrit_saskta_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/MeMo_BERT-SA_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_pipeline_en.md new file mode 100644 index 00000000000000..6d648f66280d6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_sanskrit_saskta_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English memo_bert_sanskrit_saskta_3_pipeline pipeline XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: memo_bert_sanskrit_saskta_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_sanskrit_saskta_3_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_3_pipeline_en_5.5.0_3.0_1725616678327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_3_pipeline_en_5.5.0_3.0_1725616678327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("memo_bert_sanskrit_saskta_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("memo_bert_sanskrit_saskta_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_sanskrit_saskta_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/MeMo_BERT-SA_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_en.md b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_en.md new file mode 100644 index 00000000000000..8286c5f774251c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English memo_bert_wsd XlmRoBertaForSequenceClassification from MiMe-MeMo +author: John Snow Labs +name: memo_bert_wsd +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_wsd` is a English model originally trained by MiMe-MeMo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_wsd_en_5.5.0_3.0_1725615923363.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_wsd_en_5.5.0_3.0_1725615923363.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_wsd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_wsd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_wsd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/MiMe-MeMo/MeMo-BERT-WSD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_pipeline_en.md new file mode 100644 index 00000000000000..06939803bed5b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-memo_bert_wsd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English memo_bert_wsd_pipeline pipeline XlmRoBertaForSequenceClassification from MiMe-MeMo +author: John Snow Labs +name: memo_bert_wsd_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_wsd_pipeline` is a English model originally trained by MiMe-MeMo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_wsd_pipeline_en_5.5.0_3.0_1725615946286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_wsd_pipeline_en_5.5.0_3.0_1725615946286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("memo_bert_wsd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("memo_bert_wsd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_wsd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.6 MB| + +## References + +https://huggingface.co/MiMe-MeMo/MeMo-BERT-WSD + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_en.md b/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_en.md new file mode 100644 index 00000000000000..21ea6642747464 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_cls_cree DeBertaForSequenceClassification from ghatgetanuj +author: John Snow Labs +name: microsoft_deberta_v3_large_cls_cree +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_cls_cree` is a English model originally trained by ghatgetanuj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_cree_en_5.5.0_3.0_1725589654692.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_cree_en_5.5.0_3.0_1725589654692.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("microsoft_deberta_v3_large_cls_cree","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("microsoft_deberta_v3_large_cls_cree", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_cls_cree| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/ghatgetanuj/microsoft-deberta-v3-large_cls_CR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_pipeline_en.md new file mode 100644 index 00000000000000..d08b75d890b77b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-microsoft_deberta_v3_large_cls_cree_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English microsoft_deberta_v3_large_cls_cree_pipeline pipeline DeBertaForSequenceClassification from ghatgetanuj +author: John Snow Labs +name: microsoft_deberta_v3_large_cls_cree_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`microsoft_deberta_v3_large_cls_cree_pipeline` is a English model originally trained by ghatgetanuj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_cree_pipeline_en_5.5.0_3.0_1725589742540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/microsoft_deberta_v3_large_cls_cree_pipeline_en_5.5.0_3.0_1725589742540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("microsoft_deberta_v3_large_cls_cree_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("microsoft_deberta_v3_large_cls_cree_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|microsoft_deberta_v3_large_cls_cree_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/ghatgetanuj/microsoft-deberta-v3-large_cls_CR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_en.md b/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_en.md new file mode 100644 index 00000000000000..c3a80d72ea7133 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mixed_distil_bert DistilBertEmbeddings from md-nishat-008 +author: John Snow Labs +name: mixed_distil_bert +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mixed_distil_bert` is a English model originally trained by md-nishat-008. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mixed_distil_bert_en_5.5.0_3.0_1725664796003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mixed_distil_bert_en_5.5.0_3.0_1725664796003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("mixed_distil_bert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("mixed_distil_bert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mixed_distil_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|246.7 MB| + +## References + +https://huggingface.co/md-nishat-008/Mixed-Distil-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_pipeline_en.md new file mode 100644 index 00000000000000..a651709dd69ec9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mixed_distil_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mixed_distil_bert_pipeline pipeline DistilBertEmbeddings from md-nishat-008 +author: John Snow Labs +name: mixed_distil_bert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mixed_distil_bert_pipeline` is a English model originally trained by md-nishat-008. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mixed_distil_bert_pipeline_en_5.5.0_3.0_1725664807827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mixed_distil_bert_pipeline_en_5.5.0_3.0_1725664807827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mixed_distil_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mixed_distil_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mixed_distil_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.7 MB| + +## References + +https://huggingface.co/md-nishat-008/Mixed-Distil-BERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_base_seed_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_base_seed_2_en.md new file mode 100644 index 00000000000000..9473b8cb434ff5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_base_seed_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mnli_microsoft_deberta_v3_base_seed_2 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_microsoft_deberta_v3_base_seed_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_microsoft_deberta_v3_base_seed_2` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725589602540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725589602540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mnli_microsoft_deberta_v3_base_seed_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mnli_microsoft_deberta_v3_base_seed_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_microsoft_deberta_v3_base_seed_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|641.2 MB| + +## References + +https://huggingface.co/utahnlp/mnli_microsoft_deberta-v3-base_seed-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_en.md new file mode 100644 index 00000000000000..951baa9af925bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mnli_microsoft_deberta_v3_large_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_microsoft_deberta_v3_large_seed_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_microsoft_deberta_v3_large_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725590596733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725590596733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mnli_microsoft_deberta_v3_large_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mnli_microsoft_deberta_v3_large_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_microsoft_deberta_v3_large_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/mnli_microsoft_deberta-v3-large_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_pipeline_en.md new file mode 100644 index 00000000000000..16e112aab2ce6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mnli_microsoft_deberta_v3_large_seed_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mnli_microsoft_deberta_v3_large_seed_1_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_microsoft_deberta_v3_large_seed_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_microsoft_deberta_v3_large_seed_1_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725590702262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725590702262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mnli_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mnli_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_microsoft_deberta_v3_large_seed_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.6 GB| + +## References + +https://huggingface.co/utahnlp/mnli_microsoft_deberta-v3-large_seed-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_en.md new file mode 100644 index 00000000000000..50994e2e13291f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mnli_roberta_large_seed_3 RoBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_roberta_large_seed_3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_roberta_large_seed_3` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_roberta_large_seed_3_en_5.5.0_3.0_1725613065921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_roberta_large_seed_3_en_5.5.0_3.0_1725613065921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("mnli_roberta_large_seed_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("mnli_roberta_large_seed_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_roberta_large_seed_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/utahnlp/mnli_roberta-large_seed-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..e4859a955bdefe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mnli_roberta_large_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mnli_roberta_large_seed_3_pipeline pipeline RoBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mnli_roberta_large_seed_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_roberta_large_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_roberta_large_seed_3_pipeline_en_5.5.0_3.0_1725613140300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_roberta_large_seed_3_pipeline_en_5.5.0_3.0_1725613140300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mnli_roberta_large_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mnli_roberta_large_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_roberta_large_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/utahnlp/mnli_roberta-large_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-model_test_en.md b/docs/_posts/ahmedlone127/2024-09-06-model_test_en.md new file mode 100644 index 00000000000000..42114201af04e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-model_test_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model_test DistilBertForTokenClassification from cccmatthew +author: John Snow Labs +name: model_test +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_test` is a English model originally trained by cccmatthew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_test_en_5.5.0_3.0_1725653970734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_test_en_5.5.0_3.0_1725653970734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("model_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("model_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/cccmatthew/model_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-model_zip_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-model_zip_pipeline_en.md new file mode 100644 index 00000000000000..d052fc6cac1303 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-model_zip_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_zip_pipeline pipeline DistilBertEmbeddings from mal-sh +author: John Snow Labs +name: model_zip_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_zip_pipeline` is a English model originally trained by mal-sh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_zip_pipeline_en_5.5.0_3.0_1725664930490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_zip_pipeline_en_5.5.0_3.0_1725664930490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_zip_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_zip_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_zip_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mal-sh/model.zip + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_80k_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_80k_en.md new file mode 100644 index 00000000000000..26cc20e2e33cf8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_80k_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_80k MPNetEmbeddings from heka-ai +author: John Snow Labs +name: mpnet_80k +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_80k` is a English model originally trained by heka-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_80k_en_5.5.0_3.0_1725595564835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_80k_en_5.5.0_3.0_1725595564835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_80k","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_80k","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_80k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/heka-ai/mpnet-80k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_en.md new file mode 100644 index 00000000000000..1c4de8f3e4cd9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mpnet_base MPNetForSequenceClassification from Mahmoud8 +author: John Snow Labs +name: mpnet_base +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base` is a English model originally trained by Mahmoud8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_en_5.5.0_3.0_1725655360245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_en_5.5.0_3.0_1725655360245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("mpnet_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|401.9 MB| + +## References + +https://huggingface.co/Mahmoud8/mpnet-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en.md new file mode 100644 index 00000000000000..62aecd7ba92a50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_gistembedloss_msee_evaluator_salestax_docs MPNetEmbeddings from Areeb-02 +author: John Snow Labs +name: mpnet_base_gistembedloss_msee_evaluator_salestax_docs +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_gistembedloss_msee_evaluator_salestax_docs` is a English model originally trained by Areeb-02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en_5.5.0_3.0_1725594883180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_gistembedloss_msee_evaluator_salestax_docs_en_5.5.0_3.0_1725594883180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_gistembedloss_msee_evaluator_salestax_docs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_gistembedloss_msee_evaluator_salestax_docs","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_gistembedloss_msee_evaluator_salestax_docs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|379.7 MB| + +## References + +https://huggingface.co/Areeb-02/mpnet-base-GISTEmbedLoss-MSEE_Evaluator-salestax-docs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en.md new file mode 100644 index 00000000000000..0da93071946144 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline pipeline MPNetEmbeddings from Areeb-02 +author: John Snow Labs +name: mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline` is a English model originally trained by Areeb-02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en_5.5.0_3.0_1725594913821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline_en_5.5.0_3.0_1725594913821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_gistembedloss_msee_evaluator_salestax_docs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|379.7 MB| + +## References + +https://huggingface.co/Areeb-02/mpnet-base-GISTEmbedLoss-MSEE_Evaluator-salestax-docs + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_en.md new file mode 100644 index 00000000000000..95150563ae35d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_mimics_query_facet_encoder MPNetEmbeddings from umass +author: John Snow Labs +name: mpnet_base_mimics_query_facet_encoder +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_mimics_query_facet_encoder` is a English model originally trained by umass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_mimics_query_facet_encoder_en_5.5.0_3.0_1725595583458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_mimics_query_facet_encoder_en_5.5.0_3.0_1725595583458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_mimics_query_facet_encoder","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_mimics_query_facet_encoder","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_mimics_query_facet_encoder| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/umass/mpnet-base-mimics-query-facet-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_pipeline_en.md new file mode 100644 index 00000000000000..f860db57aac897 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_base_mimics_query_facet_encoder_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_mimics_query_facet_encoder_pipeline pipeline MPNetEmbeddings from umass +author: John Snow Labs +name: mpnet_base_mimics_query_facet_encoder_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_mimics_query_facet_encoder_pipeline` is a English model originally trained by umass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_mimics_query_facet_encoder_pipeline_en_5.5.0_3.0_1725595604860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_mimics_query_facet_encoder_pipeline_en_5.5.0_3.0_1725595604860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_mimics_query_facet_encoder_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_mimics_query_facet_encoder_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_mimics_query_facet_encoder_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/umass/mpnet-base-mimics-query-facet-encoder + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mpnet_multilabel_sector_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mpnet_multilabel_sector_classifier_pipeline_en.md new file mode 100644 index 00000000000000..c89b143e8625f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mpnet_multilabel_sector_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mpnet_multilabel_sector_classifier_pipeline pipeline MPNetForSequenceClassification from ppsingh +author: John Snow Labs +name: mpnet_multilabel_sector_classifier_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_multilabel_sector_classifier_pipeline` is a English model originally trained by ppsingh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_multilabel_sector_classifier_pipeline_en_5.5.0_3.0_1725655469678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_multilabel_sector_classifier_pipeline_en_5.5.0_3.0_1725655469678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_multilabel_sector_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_multilabel_sector_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_multilabel_sector_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/ppsingh/mpnet-multilabel-sector-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_en.md new file mode 100644 index 00000000000000..56ad5f0d7c306f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mrpc_microsoft_deberta_v3_large_seed_2 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mrpc_microsoft_deberta_v3_large_seed_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrpc_microsoft_deberta_v3_large_seed_2` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_2_en_5.5.0_3.0_1725590841759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_2_en_5.5.0_3.0_1725590841759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("mrpc_microsoft_deberta_v3_large_seed_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("mrpc_microsoft_deberta_v3_large_seed_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrpc_microsoft_deberta_v3_large_seed_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/mrpc_microsoft_deberta-v3-large_seed-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..5d9be752a1649b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mrpc_microsoft_deberta_v3_large_seed_2_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: mrpc_microsoft_deberta_v3_large_seed_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrpc_microsoft_deberta_v3_large_seed_2_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en_5.5.0_3.0_1725590975843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrpc_microsoft_deberta_v3_large_seed_2_pipeline_en_5.5.0_3.0_1725590975843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mrpc_microsoft_deberta_v3_large_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mrpc_microsoft_deberta_v3_large_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrpc_microsoft_deberta_v3_large_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/mrpc_microsoft_deberta-v3-large_seed-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-multilingual_hate_speech_robacofi_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-multilingual_hate_speech_robacofi_pipeline_xx.md new file mode 100644 index 00000000000000..8c859959d10f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-multilingual_hate_speech_robacofi_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual multilingual_hate_speech_robacofi_pipeline pipeline XlmRoBertaForSequenceClassification from Andrazp +author: John Snow Labs +name: multilingual_hate_speech_robacofi_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_hate_speech_robacofi_pipeline` is a Multilingual model originally trained by Andrazp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_hate_speech_robacofi_pipeline_xx_5.5.0_3.0_1725616024046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_hate_speech_robacofi_pipeline_xx_5.5.0_3.0_1725616024046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multilingual_hate_speech_robacofi_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multilingual_hate_speech_robacofi_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_hate_speech_robacofi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Andrazp/multilingual-hate-speech-robacofi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx.md new file mode 100644 index 00000000000000..149a92db585aca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual multilingual_xlm_roberta_for_ner_bcokdilli_pipeline pipeline XlmRoBertaForTokenClassification from bcokdilli +author: John Snow Labs +name: multilingual_xlm_roberta_for_ner_bcokdilli_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_xlm_roberta_for_ner_bcokdilli_pipeline` is a Multilingual model originally trained by bcokdilli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx_5.5.0_3.0_1725593615683.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_bcokdilli_pipeline_xx_5.5.0_3.0_1725593615683.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multilingual_xlm_roberta_for_ner_bcokdilli_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multilingual_xlm_roberta_for_ner_bcokdilli_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_xlm_roberta_for_ner_bcokdilli_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|840.8 MB| + +## References + +https://huggingface.co/bcokdilli/multilingual-xlm-roberta-for-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_xx.md b/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_xx.md new file mode 100644 index 00000000000000..38bb3a08a6f9c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-multilingual_xlm_roberta_for_ner_bcokdilli_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_xlm_roberta_for_ner_bcokdilli XlmRoBertaForTokenClassification from bcokdilli +author: John Snow Labs +name: multilingual_xlm_roberta_for_ner_bcokdilli +date: 2024-09-06 +tags: [xx, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_xlm_roberta_for_ner_bcokdilli` is a Multilingual model originally trained by bcokdilli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_bcokdilli_xx_5.5.0_3.0_1725593531874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_bcokdilli_xx_5.5.0_3.0_1725593531874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_bcokdilli","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_bcokdilli", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_xlm_roberta_for_ner_bcokdilli| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|840.8 MB| + +## References + +https://huggingface.co/bcokdilli/multilingual-xlm-roberta-for-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_en.md new file mode 100644 index 00000000000000..0ac6a858de3a73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English naija_xlm_twitter_base XlmRoBertaEmbeddings from worldbank +author: John Snow Labs +name: naija_xlm_twitter_base +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`naija_xlm_twitter_base` is a English model originally trained by worldbank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/naija_xlm_twitter_base_en_5.5.0_3.0_1725626090126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/naija_xlm_twitter_base_en_5.5.0_3.0_1725626090126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("naija_xlm_twitter_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("naija_xlm_twitter_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|naija_xlm_twitter_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/worldbank/naija-xlm-twitter-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_pipeline_en.md new file mode 100644 index 00000000000000..c34f942c48982c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-naija_xlm_twitter_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English naija_xlm_twitter_base_pipeline pipeline XlmRoBertaEmbeddings from worldbank +author: John Snow Labs +name: naija_xlm_twitter_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`naija_xlm_twitter_base_pipeline` is a English model originally trained by worldbank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/naija_xlm_twitter_base_pipeline_en_5.5.0_3.0_1725626140660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/naija_xlm_twitter_base_pipeline_en_5.5.0_3.0_1725626140660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("naija_xlm_twitter_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("naija_xlm_twitter_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|naija_xlm_twitter_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/worldbank/naija-xlm-twitter-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en.md new file mode 100644 index 00000000000000..a62474ce194a86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned RoBertaForSequenceClassification from arminmehrabian +author: John Snow Labs +name: nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned` is a English model originally trained by arminmehrabian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en_5.5.0_3.0_1725613646809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_en_5.5.0_3.0_1725613646809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|434.5 MB| + +## References + +https://huggingface.co/arminmehrabian/nasa-impact-nasa-smd-ibm-st-v2-classification-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en.md new file mode 100644 index 00000000000000..b1a6556aa1c814 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline pipeline RoBertaForSequenceClassification from arminmehrabian +author: John Snow Labs +name: nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline` is a English model originally trained by arminmehrabian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en_5.5.0_3.0_1725613683975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline_en_5.5.0_3.0_1725613683975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nasa_impact_nasa_smd_ibm_southern_sotho_v2_classification_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|434.5 MB| + +## References + +https://huggingface.co/arminmehrabian/nasa-impact-nasa-smd-ibm-st-v2-classification-finetuned + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_en.md b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_en.md new file mode 100644 index 00000000000000..50cf4b4cc99e5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nepal_bhasa_bert_v2 BertForTokenClassification from edithram23 +author: John Snow Labs +name: nepal_bhasa_bert_v2 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_bert_v2` is a English model originally trained by edithram23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_v2_en_5.5.0_3.0_1725663636298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_v2_en_5.5.0_3.0_1725663636298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("nepal_bhasa_bert_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("nepal_bhasa_bert_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_bert_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/edithram23/new-bert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_pipeline_en.md new file mode 100644 index 00000000000000..e31985b8f8086f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_bert_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nepal_bhasa_bert_v2_pipeline pipeline BertForTokenClassification from edithram23 +author: John Snow Labs +name: nepal_bhasa_bert_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_bert_v2_pipeline` is a English model originally trained by edithram23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_v2_pipeline_en_5.5.0_3.0_1725663656923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_v2_pipeline_en_5.5.0_3.0_1725663656923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nepal_bhasa_bert_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nepal_bhasa_bert_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_bert_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/edithram23/new-bert-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_dummy_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_dummy_model_en.md new file mode 100644 index 00000000000000..9b04c0e2bb1d48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_dummy_model_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English nepal_bhasa_dummy_model DistilBertForSequenceClassification from lysandre +author: John Snow Labs +name: nepal_bhasa_dummy_model +date: 2024-09-06 +tags: [bert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_dummy_model` is a English model originally trained by lysandre. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_dummy_model_en_5.5.0_3.0_1725632975357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_dummy_model_en_5.5.0_3.0_1725632975357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler()\ + .setInputCol("text")\ + .setOutputCol("document") + +tokenizer = Tokenizer()\ + .setInputCols("document")\ + .setOutputCol("token") + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("nepal_bhasa_dummy_model","en")\ + .setInputCols(["document","token"])\ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, tokenizer, sequenceClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("nepal_bhasa_dummy_model","en") + .setInputCols(Array("document","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_dummy_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +References + +https://huggingface.co/lysandre/new-dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_phishing_email_detection_ful_en.md b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_phishing_email_detection_ful_en.md new file mode 100644 index 00000000000000..b7216a5b5b410e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nepal_bhasa_phishing_email_detection_ful_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nepal_bhasa_phishing_email_detection_ful DistilBertForSequenceClassification from kamikaze20 +author: John Snow Labs +name: nepal_bhasa_phishing_email_detection_ful +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_phishing_email_detection_ful` is a English model originally trained by kamikaze20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_phishing_email_detection_ful_en_5.5.0_3.0_1725608316574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_phishing_email_detection_ful_en_5.5.0_3.0_1725608316574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("nepal_bhasa_phishing_email_detection_ful","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("nepal_bhasa_phishing_email_detection_ful", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_phishing_email_detection_ful| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/kamikaze20/new_phishing-email-detection_ful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_en.md new file mode 100644 index 00000000000000..e425c4deec8ccb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_ecu_uda DistilBertForTokenClassification from dannyLeo16 +author: John Snow Labs +name: ner_ecu_uda +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_ecu_uda` is a English model originally trained by dannyLeo16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_ecu_uda_en_5.5.0_3.0_1725599094739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_ecu_uda_en_5.5.0_3.0_1725599094739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_ecu_uda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_ecu_uda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_ecu_uda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/dannyLeo16/NER_ECU_UDA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_pipeline_en.md new file mode 100644 index 00000000000000..8ecef4da0f7786 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_ecu_uda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_ecu_uda_pipeline pipeline DistilBertForTokenClassification from dannyLeo16 +author: John Snow Labs +name: ner_ecu_uda_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_ecu_uda_pipeline` is a English model originally trained by dannyLeo16. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_ecu_uda_pipeline_en_5.5.0_3.0_1725599107662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_ecu_uda_pipeline_en_5.5.0_3.0_1725599107662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_ecu_uda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_ecu_uda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_ecu_uda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/dannyLeo16/NER_ECU_UDA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_model_cwchang_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_model_cwchang_en.md new file mode 100644 index 00000000000000..0dc637f3d8845f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_model_cwchang_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_cwchang DistilBertForTokenClassification from cwchang +author: John Snow Labs +name: ner_model_cwchang +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_cwchang` is a English model originally trained by cwchang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_cwchang_en_5.5.0_3.0_1725653971571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_cwchang_en_5.5.0_3.0_1725653971571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_cwchang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_cwchang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_cwchang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/cwchang/ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_model_ep1_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_model_ep1_en.md new file mode 100644 index 00000000000000..ec4ca2864505a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_model_ep1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_ep1 DistilBertForTokenClassification from Polo123 +author: John Snow Labs +name: ner_model_ep1 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_ep1` is a English model originally trained by Polo123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_ep1_en_5.5.0_3.0_1725599627037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_ep1_en_5.5.0_3.0_1725599627037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_ep1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_ep1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_ep1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Polo123/ner_model_ep1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_model_maccrobat_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_model_maccrobat_en.md new file mode 100644 index 00000000000000..81ad8da06b2bcf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_model_maccrobat_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_maccrobat DistilBertForTokenClassification from DepressedSage +author: John Snow Labs +name: ner_model_maccrobat +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_maccrobat` is a English model originally trained by DepressedSage. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_maccrobat_en_5.5.0_3.0_1725654146582.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_maccrobat_en_5.5.0_3.0_1725654146582.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_maccrobat","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_maccrobat", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_maccrobat| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/DepressedSage/ner_model_MACCROBAT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_ner_random0_seed1_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_ner_random0_seed1_bernice_en.md new file mode 100644 index 00000000000000..f4f2cf37536dcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_ner_random0_seed1_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_ner_random0_seed1_bernice XlmRoBertaForTokenClassification from tweettemposhift +author: John Snow Labs +name: ner_ner_random0_seed1_bernice +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_ner_random0_seed1_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_ner_random0_seed1_bernice_en_5.5.0_3.0_1725656566980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_ner_random0_seed1_bernice_en_5.5.0_3.0_1725656566980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_ner_random0_seed1_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_ner_random0_seed1_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_ner_random0_seed1_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|802.5 MB| + +## References + +https://huggingface.co/tweettemposhift/ner-ner_random0_seed1-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_en.md new file mode 100644 index 00000000000000..b2a581af755110 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_oee_techme DistilBertForTokenClassification from techme +author: John Snow Labs +name: ner_oee_techme +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_oee_techme` is a English model originally trained by techme. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_oee_techme_en_5.5.0_3.0_1725653393231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_oee_techme_en_5.5.0_3.0_1725653393231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_oee_techme","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_oee_techme", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oee_techme| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/techme/ner_oee \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_pipeline_en.md new file mode 100644 index 00000000000000..ca94cb2409ca45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ner_oee_techme_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_oee_techme_pipeline pipeline DistilBertForTokenClassification from techme +author: John Snow Labs +name: ner_oee_techme_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_oee_techme_pipeline` is a English model originally trained by techme. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_oee_techme_pipeline_en_5.5.0_3.0_1725653405749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_oee_techme_pipeline_en_5.5.0_3.0_1725653405749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_oee_techme_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_oee_techme_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_oee_techme_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/techme/ner_oee + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_en.md new file mode 100644 index 00000000000000..24265f9a1f2de1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nerd_nerd_temporal_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_temporal_bernice +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_temporal_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_temporal_bernice_en_5.5.0_3.0_1725620819744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_temporal_bernice_en_5.5.0_3.0_1725620819744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_temporal_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_temporal_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_temporal_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|826.9 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_temporal-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_pipeline_en.md new file mode 100644 index 00000000000000..f5870b1a55cb6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nerd_nerd_temporal_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nerd_nerd_temporal_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_temporal_bernice_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_temporal_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_temporal_bernice_pipeline_en_5.5.0_3.0_1725620949026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_temporal_bernice_pipeline_en_5.5.0_3.0_1725620949026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerd_nerd_temporal_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerd_nerd_temporal_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_temporal_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.9 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_temporal-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_en.md new file mode 100644 index 00000000000000..c809bf5b763d62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English news_title_roberta RoBertaForSequenceClassification from Varshitha +author: John Snow Labs +name: news_title_roberta +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`news_title_roberta` is a English model originally trained by Varshitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/news_title_roberta_en_5.5.0_3.0_1725613178918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/news_title_roberta_en_5.5.0_3.0_1725613178918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("news_title_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("news_title_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|news_title_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|446.2 MB| + +## References + +https://huggingface.co/Varshitha/News_Title_RoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_pipeline_en.md new file mode 100644 index 00000000000000..90f4fad02587f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-news_title_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English news_title_roberta_pipeline pipeline RoBertaForSequenceClassification from Varshitha +author: John Snow Labs +name: news_title_roberta_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`news_title_roberta_pipeline` is a English model originally trained by Varshitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/news_title_roberta_pipeline_en_5.5.0_3.0_1725613202610.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/news_title_roberta_pipeline_en_5.5.0_3.0_1725613202610.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("news_title_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("news_title_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|news_title_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|446.2 MB| + +## References + +https://huggingface.co/Varshitha/News_Title_RoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_en.md new file mode 100644 index 00000000000000..9ce400145dcb84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nlp_course_chapter4_test_model_1 CamemBertEmbeddings from BanUrsus +author: John Snow Labs +name: nlp_course_chapter4_test_model_1 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_course_chapter4_test_model_1` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_course_chapter4_test_model_1_en_5.5.0_3.0_1725631858739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_course_chapter4_test_model_1_en_5.5.0_3.0_1725631858739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("nlp_course_chapter4_test_model_1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("nlp_course_chapter4_test_model_1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_course_chapter4_test_model_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/BanUrsus/NLP-course-chapter4-test-model-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_pipeline_en.md new file mode 100644 index 00000000000000..b8117aa3e6c7f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nlp_course_chapter4_test_model_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nlp_course_chapter4_test_model_1_pipeline pipeline CamemBertEmbeddings from BanUrsus +author: John Snow Labs +name: nlp_course_chapter4_test_model_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_course_chapter4_test_model_1_pipeline` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_course_chapter4_test_model_1_pipeline_en_5.5.0_3.0_1725631937030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_course_chapter4_test_model_1_pipeline_en_5.5.0_3.0_1725631937030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nlp_course_chapter4_test_model_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nlp_course_chapter4_test_model_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_course_chapter4_test_model_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/BanUrsus/NLP-course-chapter4-test-model-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_en.md b/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_en.md new file mode 100644 index 00000000000000..7fbdb2a23191e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English nlp_task_modified DistilBertForQuestionAnswering from Stucknight +author: John Snow Labs +name: nlp_task_modified +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_task_modified` is a English model originally trained by Stucknight. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_task_modified_en_5.5.0_3.0_1725622073652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_task_modified_en_5.5.0_3.0_1725622073652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("nlp_task_modified","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("nlp_task_modified", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_task_modified| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Stucknight/nlp_task_modified \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_pipeline_en.md new file mode 100644 index 00000000000000..a38c85b4c177d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nlp_task_modified_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English nlp_task_modified_pipeline pipeline DistilBertForQuestionAnswering from Stucknight +author: John Snow Labs +name: nlp_task_modified_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlp_task_modified_pipeline` is a English model originally trained by Stucknight. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlp_task_modified_pipeline_en_5.5.0_3.0_1725622086257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlp_task_modified_pipeline_en_5.5.0_3.0_1725622086257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nlp_task_modified_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nlp_task_modified_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlp_task_modified_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Stucknight/nlp_task_modified + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_no.md new file mode 100644 index 00000000000000..48cb4817c0fb13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_base_beta WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_base_beta +date: 2024-09-06 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_base_beta` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_beta_no_5.5.0_3.0_1725581874744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_beta_no_5.5.0_3.0_1725581874744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_base_beta","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_base_beta", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_base_beta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|641.6 MB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-base-beta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_pipeline_no.md new file mode 100644 index 00000000000000..4bb3e7c40916fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_base_beta_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_base_beta_pipeline pipeline WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_base_beta_pipeline +date: 2024-09-06 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_base_beta_pipeline` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_beta_pipeline_no_5.5.0_3.0_1725581909541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_beta_pipeline_no_5.5.0_3.0_1725581909541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_base_beta_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_base_beta_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_base_beta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|641.7 MB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-base-beta + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_no.md new file mode 100644 index 00000000000000..86edd87cdb19cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_beta WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_small_beta +date: 2024-09-06 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_beta` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_beta_no_5.5.0_3.0_1725581456053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_beta_no_5.5.0_3.0_1725581456053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_beta","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_beta", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_beta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-small-beta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_pipeline_no.md new file mode 100644 index 00000000000000..a26ceeb2a9d828 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_beta_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_beta_pipeline pipeline WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_small_beta_pipeline +date: 2024-09-06 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_beta_pipeline` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_beta_pipeline_no_5.5.0_3.0_1725581550858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_beta_pipeline_no_5.5.0_3.0_1725581550858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_small_beta_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_small_beta_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_beta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-small-beta + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_no.md new file mode 100644 index 00000000000000..758f26e1d9fe75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_verbatim_nbailabbeta WhisperForCTC from NbAiLabBeta +author: John Snow Labs +name: norwegian_bokml_whisper_small_verbatim_nbailabbeta +date: 2024-09-06 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_verbatim_nbailabbeta` is a Norwegian model originally trained by NbAiLabBeta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailabbeta_no_5.5.0_3.0_1725648217258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailabbeta_no_5.5.0_3.0_1725648217258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_verbatim_nbailabbeta","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_small_verbatim_nbailabbeta", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_verbatim_nbailabbeta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLabBeta/nb-whisper-small-verbatim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no.md new file mode 100644 index 00000000000000..3bb6907c9a36dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline pipeline WhisperForCTC from NbAiLabBeta +author: John Snow Labs +name: norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline +date: 2024-09-06 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline` is a Norwegian model originally trained by NbAiLabBeta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no_5.5.0_3.0_1725648295501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline_no_5.5.0_3.0_1725648295501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_small_verbatim_nbailabbeta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|1.7 GB| + +## References + +https://huggingface.co/NbAiLabBeta/nb-whisper-small-verbatim + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nps_mpnet_lds_en.md b/docs/_posts/ahmedlone127/2024-09-06-nps_mpnet_lds_en.md new file mode 100644 index 00000000000000..ad79df261d932a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nps_mpnet_lds_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English nps_mpnet_lds MPNetEmbeddings from nategro +author: John Snow Labs +name: nps_mpnet_lds +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nps_mpnet_lds` is a English model originally trained by nategro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nps_mpnet_lds_en_5.5.0_3.0_1725595830532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nps_mpnet_lds_en_5.5.0_3.0_1725595830532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("nps_mpnet_lds","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("nps_mpnet_lds","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nps_mpnet_lds| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/nategro/nps-mpnet-lds \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_en.md new file mode 100644 index 00000000000000..d09928b48b11bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nusabert_base BertEmbeddings from LazarusNLP +author: John Snow Labs +name: nusabert_base +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nusabert_base` is a English model originally trained by LazarusNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nusabert_base_en_5.5.0_3.0_1725617941005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nusabert_base_en_5.5.0_3.0_1725617941005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("nusabert_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("nusabert_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nusabert_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/LazarusNLP/NusaBERT-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_pipeline_en.md new file mode 100644 index 00000000000000..89731cccb50e61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-nusabert_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nusabert_base_pipeline pipeline BertEmbeddings from LazarusNLP +author: John Snow Labs +name: nusabert_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nusabert_base_pipeline` is a English model originally trained by LazarusNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nusabert_base_pipeline_en_5.5.0_3.0_1725617961910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nusabert_base_pipeline_en_5.5.0_3.0_1725617961910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nusabert_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nusabert_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nusabert_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/LazarusNLP/NusaBERT-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-ojobert_en.md b/docs/_posts/ahmedlone127/2024-09-06-ojobert_en.md new file mode 100644 index 00000000000000..b2bd3e9786ec13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-ojobert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ojobert DistilBertEmbeddings from ihk +author: John Snow Labs +name: ojobert +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ojobert` is a English model originally trained by ihk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ojobert_en_5.5.0_3.0_1725639636883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ojobert_en_5.5.0_3.0_1725639636883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("ojobert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("ojobert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ojobert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ihk/ojobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en.md new file mode 100644 index 00000000000000..17c66f531d76e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opendispatcher_v4_gpt35turbo_and_gpt4_pipeline pipeline DistilBertForSequenceClassification from gaodrew +author: John Snow Labs +name: opendispatcher_v4_gpt35turbo_and_gpt4_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opendispatcher_v4_gpt35turbo_and_gpt4_pipeline` is a English model originally trained by gaodrew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en_5.5.0_3.0_1725608356376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opendispatcher_v4_gpt35turbo_and_gpt4_pipeline_en_5.5.0_3.0_1725608356376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opendispatcher_v4_gpt35turbo_and_gpt4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opendispatcher_v4_gpt35turbo_and_gpt4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opendispatcher_v4_gpt35turbo_and_gpt4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/gaodrew/OpenDispatcher_v4_gpt35turbo_and_gpt4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en.md new file mode 100644 index 00000000000000..4af4eccd0c5a5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline pipeline MarianTransformer from huhu233 +author: John Snow Labs +name: opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline` is a English model originally trained by huhu233. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en_5.5.0_3.0_1725636005120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline_en_5.5.0_3.0_1725636005120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_chinese_finetuned_english_tonga_tonga_islands_chinese_galgame_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|541.2 MB| + +## References + +https://huggingface.co/huhu233/opus-mt-en-zh-finetuned-en-to-zh-galgame + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en.md new file mode 100644 index 00000000000000..446112ca63b844 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac MarianTransformer from Achuka +author: John Snow Labs +name: opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac` is a English model originally trained by Achuka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en_5.5.0_3.0_1725634958943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac_en_5.5.0_3.0_1725634958943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_ganda_finetuned_english_tonga_tonga_islands_ac| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|512.5 MB| + +## References + +https://huggingface.co/Achuka/opus-mt-en-lg-finetuned-en-to-ac \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en.md new file mode 100644 index 00000000000000..c96cd15867d89f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline pipeline MarianTransformer from pong +author: John Snow Labs +name: opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline` is a English model originally trained by pong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en_5.5.0_3.0_1725635731651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline_en_5.5.0_3.0_1725635731651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_multiple_languages_finetuned_english_tonga_tonga_islands_thai_pong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|530.8 MB| + +## References + +https://huggingface.co/pong/opus-mt-en-mul-finetuned-en-to-th + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en.md new file mode 100644 index 00000000000000..3696db681c6792 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german MarianTransformer from MicMer17 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german` is a English model originally trained by MicMer17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en_5.5.0_3.0_1725635658837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german_en_5.5.0_3.0_1725635658837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_german_finetuned_english_tonga_tonga_islands_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/MicMer17/opus-mt-en-ro-finetuned-en-to-de-finetuned-en-to-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en.md new file mode 100644 index 00000000000000..3ec547b090fc4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi MarianTransformer from lamkhoi +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi` is a English model originally trained by lamkhoi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en_5.5.0_3.0_1725634958481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_en_5.5.0_3.0_1725634958481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/lamkhoi/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en.md new file mode 100644 index 00000000000000..11dbff9832f61c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline pipeline MarianTransformer from lamkhoi +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline` is a English model originally trained by lamkhoi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en_5.5.0_3.0_1725634993072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline_en_5.5.0_3.0_1725634993072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_lamkhoi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.2 MB| + +## References + +https://huggingface.co/lamkhoi/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en.md new file mode 100644 index 00000000000000..f2e5c346296fff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919 MarianTransformer from Souta0919 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919` is a English model originally trained by Souta0919. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en_5.5.0_3.0_1725635900504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_en_5.5.0_3.0_1725635900504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/Souta0919/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en.md new file mode 100644 index 00000000000000..219a4f5cdfb7da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline pipeline MarianTransformer from Souta0919 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline` is a English model originally trained by Souta0919. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en_5.5.0_3.0_1725635925905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline_en_5.5.0_3.0_1725635925905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_souta0919_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.1 MB| + +## References + +https://huggingface.co/Souta0919/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en.md new file mode 100644 index 00000000000000..39043178be58d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline pipeline MarianTransformer from Chituyi +author: John Snow Labs +name: opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline` is a English model originally trained by Chituyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en_5.5.0_3.0_1725635828372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline_en_5.5.0_3.0_1725635828372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_chituyi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.0 MB| + +## References + +https://huggingface.co/Chituyi/opus-mt-en-sw-finetuned-en-to-sw + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en.md new file mode 100644 index 00000000000000..f86e7b1646cbdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english MarianTransformer from Dentikka +author: John Snow Labs +name: opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english` is a English model originally trained by Dentikka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en_5.5.0_3.0_1725636308288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english_en_5.5.0_3.0_1725636308288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_russian_english_end_tonga_tonga_islands_end_russian_tonga_tonga_islands_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|525.2 MB| + +## References + +https://huggingface.co/Dentikka/opus-mt-ru-en-end-to-end-ru-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en.md new file mode 100644 index 00000000000000..b44160cb5d09b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs MarianTransformer from UnassumingOwl +author: John Snow Labs +name: opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs` is a English model originally trained by UnassumingOwl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en_5.5.0_3.0_1725635157506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_en_5.5.0_3.0_1725635157506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|528.5 MB| + +## References + +https://huggingface.co/UnassumingOwl/opus-mt-ti-en-finetuned-npomo-en-10-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en.md new file mode 100644 index 00000000000000..74d1be34514dfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline pipeline MarianTransformer from UnassumingOwl +author: John Snow Labs +name: opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline` is a English model originally trained by UnassumingOwl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en_5.5.0_3.0_1725635186525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline_en_5.5.0_3.0_1725635186525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_tigrinya_english_finetuned_npomo_english_10_epochs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|529.1 MB| + +## References + +https://huggingface.co/UnassumingOwl/opus-mt-ti-en-finetuned-npomo-en-10-epochs + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_en.md new file mode 100644 index 00000000000000..a25065e2311233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English panx_xlmr_base XlmRoBertaEmbeddings from oceanpty +author: John Snow Labs +name: panx_xlmr_base +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`panx_xlmr_base` is a English model originally trained by oceanpty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/panx_xlmr_base_en_5.5.0_3.0_1725596814994.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/panx_xlmr_base_en_5.5.0_3.0_1725596814994.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("panx_xlmr_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("panx_xlmr_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|panx_xlmr_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|797.5 MB| + +## References + +https://huggingface.co/oceanpty/panx-xlmr-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_pipeline_en.md new file mode 100644 index 00000000000000..f22e44c2358969 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-panx_xlmr_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English panx_xlmr_base_pipeline pipeline XlmRoBertaEmbeddings from oceanpty +author: John Snow Labs +name: panx_xlmr_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`panx_xlmr_base_pipeline` is a English model originally trained by oceanpty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/panx_xlmr_base_pipeline_en_5.5.0_3.0_1725596944703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/panx_xlmr_base_pipeline_en_5.5.0_3.0_1725596944703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("panx_xlmr_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("panx_xlmr_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|panx_xlmr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|797.5 MB| + +## References + +https://huggingface.co/oceanpty/panx-xlmr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_en.md new file mode 100644 index 00000000000000..f5aabd2bb05ebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English pap2patentsberta MPNetEmbeddings from AI-Growth-Lab +author: John Snow Labs +name: pap2patentsberta +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pap2patentsberta` is a English model originally trained by AI-Growth-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pap2patentsberta_en_5.5.0_3.0_1725595020277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pap2patentsberta_en_5.5.0_3.0_1725595020277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("pap2patentsberta","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("pap2patentsberta","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pap2patentsberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/AI-Growth-Lab/Pap2PatentSBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_pipeline_en.md new file mode 100644 index 00000000000000..c2ac6a57355d20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pap2patentsberta_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English pap2patentsberta_pipeline pipeline MPNetEmbeddings from AI-Growth-Lab +author: John Snow Labs +name: pap2patentsberta_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pap2patentsberta_pipeline` is a English model originally trained by AI-Growth-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pap2patentsberta_pipeline_en_5.5.0_3.0_1725595041003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pap2patentsberta_pipeline_en_5.5.0_3.0_1725595041003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pap2patentsberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pap2patentsberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pap2patentsberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/AI-Growth-Lab/Pap2PatentSBERTa + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_en.md b/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_en.md new file mode 100644 index 00000000000000..b3271e5d2f0259 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English paws_x_xlm_r_only_german XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: paws_x_xlm_r_only_german +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paws_x_xlm_r_only_german` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paws_x_xlm_r_only_german_en_5.5.0_3.0_1725620204478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paws_x_xlm_r_only_german_en_5.5.0_3.0_1725620204478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("paws_x_xlm_r_only_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("paws_x_xlm_r_only_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paws_x_xlm_r_only_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|802.2 MB| + +## References + +https://huggingface.co/semindan/paws_x_xlm_r_only_de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_pipeline_en.md new file mode 100644 index 00000000000000..71a3ca1c8249c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-paws_x_xlm_r_only_german_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English paws_x_xlm_r_only_german_pipeline pipeline XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: paws_x_xlm_r_only_german_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paws_x_xlm_r_only_german_pipeline` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paws_x_xlm_r_only_german_pipeline_en_5.5.0_3.0_1725620340035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paws_x_xlm_r_only_german_pipeline_en_5.5.0_3.0_1725620340035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("paws_x_xlm_r_only_german_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("paws_x_xlm_r_only_german_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paws_x_xlm_r_only_german_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|802.2 MB| + +## References + +https://huggingface.co/semindan/paws_x_xlm_r_only_de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-perspective_conditional_utilitarian_deberta_01_en.md b/docs/_posts/ahmedlone127/2024-09-06-perspective_conditional_utilitarian_deberta_01_en.md new file mode 100644 index 00000000000000..4cbae774be6f84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-perspective_conditional_utilitarian_deberta_01_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English perspective_conditional_utilitarian_deberta_01 DeBertaForSequenceClassification from edmundmills +author: John Snow Labs +name: perspective_conditional_utilitarian_deberta_01 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`perspective_conditional_utilitarian_deberta_01` is a English model originally trained by edmundmills. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/perspective_conditional_utilitarian_deberta_01_en_5.5.0_3.0_1725611575979.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/perspective_conditional_utilitarian_deberta_01_en_5.5.0_3.0_1725611575979.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("perspective_conditional_utilitarian_deberta_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("perspective_conditional_utilitarian_deberta_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|perspective_conditional_utilitarian_deberta_01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/edmundmills/perspective-conditional-utilitarian-deberta-01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_en.md new file mode 100644 index 00000000000000..84e338586a81bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English pharo_classreferences_classifier MPNetEmbeddings from AISE-TUDelft +author: John Snow Labs +name: pharo_classreferences_classifier +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharo_classreferences_classifier` is a English model originally trained by AISE-TUDelft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharo_classreferences_classifier_en_5.5.0_3.0_1725595654560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharo_classreferences_classifier_en_5.5.0_3.0_1725595654560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("pharo_classreferences_classifier","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("pharo_classreferences_classifier","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharo_classreferences_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/AISE-TUDelft/pharo-classreferences-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_pipeline_en.md new file mode 100644 index 00000000000000..a65280edbcd7ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pharo_classreferences_classifier_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English pharo_classreferences_classifier_pipeline pipeline MPNetEmbeddings from AISE-TUDelft +author: John Snow Labs +name: pharo_classreferences_classifier_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharo_classreferences_classifier_pipeline` is a English model originally trained by AISE-TUDelft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharo_classreferences_classifier_pipeline_en_5.5.0_3.0_1725595675675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharo_classreferences_classifier_pipeline_en_5.5.0_3.0_1725595675675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pharo_classreferences_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pharo_classreferences_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharo_classreferences_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/AISE-TUDelft/pharo-classreferences-classifier + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-phayathaibert_th.md b/docs/_posts/ahmedlone127/2024-09-06-phayathaibert_th.md new file mode 100644 index 00000000000000..a284baea380836 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-phayathaibert_th.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Thai phayathaibert CamemBertEmbeddings from clicknext +author: John Snow Labs +name: phayathaibert +date: 2024-09-06 +tags: [th, open_source, onnx, embeddings, camembert] +task: Embeddings +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phayathaibert` is a Thai model originally trained by clicknext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phayathaibert_th_5.5.0_3.0_1725636825036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phayathaibert_th_5.5.0_3.0_1725636825036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("phayathaibert","th") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("phayathaibert","th") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phayathaibert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|th| +|Size:|1.0 GB| + +## References + +https://huggingface.co/clicknext/phayathaibert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-philberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-philberta_en.md new file mode 100644 index 00000000000000..d050ae85b6c234 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-philberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English philberta RoBertaEmbeddings from bowphs +author: John Snow Labs +name: philberta +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`philberta` is a English model originally trained by bowphs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/philberta_en_5.5.0_3.0_1725660922720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/philberta_en_5.5.0_3.0_1725660922720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("philberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("philberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|philberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|504.5 MB| + +## References + +https://huggingface.co/bowphs/PhilBerta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-philberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-philberta_pipeline_en.md new file mode 100644 index 00000000000000..157c52f3c6defe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-philberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English philberta_pipeline pipeline RoBertaEmbeddings from bowphs +author: John Snow Labs +name: philberta_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`philberta_pipeline` is a English model originally trained by bowphs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/philberta_pipeline_en_5.5.0_3.0_1725660949775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/philberta_pipeline_en_5.5.0_3.0_1725660949775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("philberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("philberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|philberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|504.5 MB| + +## References + +https://huggingface.co/bowphs/PhilBerta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_en.md b/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_en.md new file mode 100644 index 00000000000000..f804d7a21a9c28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pictalk_distil DistilBertEmbeddings from LucasMagnana +author: John Snow Labs +name: pictalk_distil +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pictalk_distil` is a English model originally trained by LucasMagnana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pictalk_distil_en_5.5.0_3.0_1725639566594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pictalk_distil_en_5.5.0_3.0_1725639566594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("pictalk_distil","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("pictalk_distil","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pictalk_distil| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|268.1 MB| + +## References + +https://huggingface.co/LucasMagnana/Pictalk_distil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_pipeline_en.md new file mode 100644 index 00000000000000..10db28476506aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pictalk_distil_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pictalk_distil_pipeline pipeline DistilBertEmbeddings from LucasMagnana +author: John Snow Labs +name: pictalk_distil_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pictalk_distil_pipeline` is a English model originally trained by LucasMagnana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pictalk_distil_pipeline_en_5.5.0_3.0_1725639579679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pictalk_distil_pipeline_en_5.5.0_3.0_1725639579679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pictalk_distil_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pictalk_distil_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pictalk_distil_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|268.1 MB| + +## References + +https://huggingface.co/LucasMagnana/Pictalk_distil + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_en.md b/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_en.md new file mode 100644 index 00000000000000..1ceddf04eddde1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pii_roberta_large RoBertaForTokenClassification from dynamofl-sandbox +author: John Snow Labs +name: pii_roberta_large +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_roberta_large` is a English model originally trained by dynamofl-sandbox. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_roberta_large_en_5.5.0_3.0_1725625027013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_roberta_large_en_5.5.0_3.0_1725625027013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("pii_roberta_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("pii_roberta_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_roberta_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/dynamofl-sandbox/pii-roberta-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_pipeline_en.md new file mode 100644 index 00000000000000..f52dfcfadf6723 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-pii_roberta_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pii_roberta_large_pipeline pipeline RoBertaForTokenClassification from dynamofl-sandbox +author: John Snow Labs +name: pii_roberta_large_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_roberta_large_pipeline` is a English model originally trained by dynamofl-sandbox. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_roberta_large_pipeline_en_5.5.0_3.0_1725625098501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_roberta_large_pipeline_en_5.5.0_3.0_1725625098501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pii_roberta_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pii_roberta_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_roberta_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/dynamofl-sandbox/pii-roberta-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-polyfaq_cross_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-polyfaq_cross_pipeline_en.md new file mode 100644 index 00000000000000..89fdf956ad3f6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-polyfaq_cross_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English polyfaq_cross_pipeline pipeline XlmRoBertaForSequenceClassification from maximedb +author: John Snow Labs +name: polyfaq_cross_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`polyfaq_cross_pipeline` is a English model originally trained by maximedb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/polyfaq_cross_pipeline_en_5.5.0_3.0_1725618886315.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/polyfaq_cross_pipeline_en_5.5.0_3.0_1725618886315.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("polyfaq_cross_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("polyfaq_cross_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|polyfaq_cross_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|997.5 MB| + +## References + +https://huggingface.co/maximedb/polyfaq_cross + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_en.md b/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_en.md new file mode 100644 index 00000000000000..14f40de2ca2db2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English portuguese_xlm_r_falsetrue_0_2_best XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_xlm_r_falsetrue_0_2_best +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_xlm_r_falsetrue_0_2_best` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_xlm_r_falsetrue_0_2_best_en_5.5.0_3.0_1725618864876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_xlm_r_falsetrue_0_2_best_en_5.5.0_3.0_1725618864876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_xlm_r_falsetrue_0_2_best","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_xlm_r_falsetrue_0_2_best", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_xlm_r_falsetrue_0_2_best| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|780.6 MB| + +## References + +https://huggingface.co/harish/PT-XLM_R-FalseTrue-0_2_BEST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_pipeline_en.md new file mode 100644 index 00000000000000..c67b36b1e4da88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-portuguese_xlm_r_falsetrue_0_2_best_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English portuguese_xlm_r_falsetrue_0_2_best_pipeline pipeline XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_xlm_r_falsetrue_0_2_best_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_xlm_r_falsetrue_0_2_best_pipeline` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_xlm_r_falsetrue_0_2_best_pipeline_en_5.5.0_3.0_1725619009478.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_xlm_r_falsetrue_0_2_best_pipeline_en_5.5.0_3.0_1725619009478.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("portuguese_xlm_r_falsetrue_0_2_best_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("portuguese_xlm_r_falsetrue_0_2_best_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_xlm_r_falsetrue_0_2_best_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|780.6 MB| + +## References + +https://huggingface.co/harish/PT-XLM_R-FalseTrue-0_2_BEST + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_en.md b/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_en.md new file mode 100644 index 00000000000000..de9b91c22d69f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English predict_perception_xlmr_blame_victim XlmRoBertaForSequenceClassification from responsibility-framing +author: John Snow Labs +name: predict_perception_xlmr_blame_victim +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_perception_xlmr_blame_victim` is a English model originally trained by responsibility-framing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_victim_en_5.5.0_3.0_1725620095419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_victim_en_5.5.0_3.0_1725620095419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_blame_victim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_blame_victim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_perception_xlmr_blame_victim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/responsibility-framing/predict-perception-xlmr-blame-victim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_pipeline_en.md new file mode 100644 index 00000000000000..884f973b95b1cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-predict_perception_xlmr_blame_victim_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English predict_perception_xlmr_blame_victim_pipeline pipeline XlmRoBertaForSequenceClassification from responsibility-framing +author: John Snow Labs +name: predict_perception_xlmr_blame_victim_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_perception_xlmr_blame_victim_pipeline` is a English model originally trained by responsibility-framing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_victim_pipeline_en_5.5.0_3.0_1725620166602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_victim_pipeline_en_5.5.0_3.0_1725620166602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("predict_perception_xlmr_blame_victim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("predict_perception_xlmr_blame_victim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_perception_xlmr_blame_victim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/responsibility-framing/predict-perception-xlmr-blame-victim + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_en.md b/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_en.md new file mode 100644 index 00000000000000..10a7517014cc76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English punct_model_v2 DistilBertForTokenClassification from TokenWhisperer +author: John Snow Labs +name: punct_model_v2 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`punct_model_v2` is a English model originally trained by TokenWhisperer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/punct_model_v2_en_5.5.0_3.0_1725599269619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/punct_model_v2_en_5.5.0_3.0_1725599269619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("punct_model_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("punct_model_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|punct_model_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/TokenWhisperer/punct_model_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_pipeline_en.md new file mode 100644 index 00000000000000..3e1872914b1665 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-punct_model_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English punct_model_v2_pipeline pipeline DistilBertForTokenClassification from TokenWhisperer +author: John Snow Labs +name: punct_model_v2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`punct_model_v2_pipeline` is a English model originally trained by TokenWhisperer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/punct_model_v2_pipeline_en_5.5.0_3.0_1725599282726.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/punct_model_v2_pipeline_en_5.5.0_3.0_1725599282726.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("punct_model_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("punct_model_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|punct_model_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/TokenWhisperer/punct_model_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-punctuate_en.md b/docs/_posts/ahmedlone127/2024-09-06-punctuate_en.md new file mode 100644 index 00000000000000..82d17ef1e59ea2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-punctuate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English punctuate DistilBertForTokenClassification from venkatchoudharyala +author: John Snow Labs +name: punctuate +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`punctuate` is a English model originally trained by venkatchoudharyala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/punctuate_en_5.5.0_3.0_1725653621875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/punctuate_en_5.5.0_3.0_1725653621875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("punctuate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("punctuate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|punctuate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/venkatchoudharyala/Punctuate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-punctuate_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-punctuate_pipeline_en.md new file mode 100644 index 00000000000000..1583c441db4b96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-punctuate_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English punctuate_pipeline pipeline DistilBertForTokenClassification from venkatchoudharyala +author: John Snow Labs +name: punctuate_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`punctuate_pipeline` is a English model originally trained by venkatchoudharyala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/punctuate_pipeline_en_5.5.0_3.0_1725653633669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/punctuate_pipeline_en_5.5.0_3.0_1725653633669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("punctuate_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("punctuate_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|punctuate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/venkatchoudharyala/Punctuate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q2d_128b_en.md b/docs/_posts/ahmedlone127/2024-09-06-q2d_128b_en.md new file mode 100644 index 00000000000000..17f9e8a08ebb4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q2d_128b_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English q2d_128b MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2d_128b +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2d_128b` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2d_128b_en_5.5.0_3.0_1725595556708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2d_128b_en_5.5.0_3.0_1725595556708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("q2d_128b","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("q2d_128b","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2d_128b| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2d_128b \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_en.md b/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_en.md new file mode 100644 index 00000000000000..cf318a45dd0798 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English q2d_gpt_22 MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2d_gpt_22 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2d_gpt_22` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2d_gpt_22_en_5.5.0_3.0_1725595530628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2d_gpt_22_en_5.5.0_3.0_1725595530628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("q2d_gpt_22","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("q2d_gpt_22","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2d_gpt_22| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2d_gpt_22 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_pipeline_en.md new file mode 100644 index 00000000000000..905b175d23d6e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q2d_gpt_22_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English q2d_gpt_22_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2d_gpt_22_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2d_gpt_22_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2d_gpt_22_pipeline_en_5.5.0_3.0_1725595555743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2d_gpt_22_pipeline_en_5.5.0_3.0_1725595555743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("q2d_gpt_22_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("q2d_gpt_22_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2d_gpt_22_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2d_gpt_22 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_en.md b/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_en.md new file mode 100644 index 00000000000000..d6f15910bab8fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English q2e_ep3_42 MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2e_ep3_42 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2e_ep3_42` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2e_ep3_42_en_5.5.0_3.0_1725595300669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2e_ep3_42_en_5.5.0_3.0_1725595300669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("q2e_ep3_42","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("q2e_ep3_42","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2e_ep3_42| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2e_ep3_42 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_pipeline_en.md new file mode 100644 index 00000000000000..645e4a6e829f34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q2e_ep3_42_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English q2e_ep3_42_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2e_ep3_42_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2e_ep3_42_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2e_ep3_42_pipeline_en_5.5.0_3.0_1725595321179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2e_ep3_42_pipeline_en_5.5.0_3.0_1725595321179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("q2e_ep3_42_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("q2e_ep3_42_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2e_ep3_42_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2e_ep3_42 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_en.md b/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_en.md new file mode 100644 index 00000000000000..a9912a980592a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English q_only_ep3_22 MPNetEmbeddings from ingeol +author: John Snow Labs +name: q_only_ep3_22 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q_only_ep3_22` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q_only_ep3_22_en_5.5.0_3.0_1725595026444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q_only_ep3_22_en_5.5.0_3.0_1725595026444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("q_only_ep3_22","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("q_only_ep3_22","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q_only_ep3_22| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q_only_ep3_22 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_pipeline_en.md new file mode 100644 index 00000000000000..f0b3f00f8c2e5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-q_only_ep3_22_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English q_only_ep3_22_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: q_only_ep3_22_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q_only_ep3_22_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q_only_ep3_22_pipeline_en_5.5.0_3.0_1725595048927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q_only_ep3_22_pipeline_en_5.5.0_3.0_1725595048927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("q_only_ep3_22_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("q_only_ep3_22_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q_only_ep3_22_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q_only_ep3_22 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_distell0_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_distell0_en.md new file mode 100644 index 00000000000000..3a327ca7952001 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_distell0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_distell0 DistilBertForQuestionAnswering from StaAhmed +author: John Snow Labs +name: qa_distell0 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_distell0` is a English model originally trained by StaAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_distell0_en_5.5.0_3.0_1725652441057.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_distell0_en_5.5.0_3.0_1725652441057.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_distell0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_distell0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_distell0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/StaAhmed/QA_distell0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_en.md new file mode 100644 index 00000000000000..6d11752728a984 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model2_sumittagadiya DistilBertForQuestionAnswering from sumittagadiya +author: John Snow Labs +name: qa_model2_sumittagadiya +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model2_sumittagadiya` is a English model originally trained by sumittagadiya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model2_sumittagadiya_en_5.5.0_3.0_1725654355245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model2_sumittagadiya_en_5.5.0_3.0_1725654355245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model2_sumittagadiya","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model2_sumittagadiya", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model2_sumittagadiya| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sumittagadiya/qa_model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_pipeline_en.md new file mode 100644 index 00000000000000..fa4d370db6dd6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model2_sumittagadiya_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model2_sumittagadiya_pipeline pipeline DistilBertForQuestionAnswering from sumittagadiya +author: John Snow Labs +name: qa_model2_sumittagadiya_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model2_sumittagadiya_pipeline` is a English model originally trained by sumittagadiya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model2_sumittagadiya_pipeline_en_5.5.0_3.0_1725654371933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model2_sumittagadiya_pipeline_en_5.5.0_3.0_1725654371933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model2_sumittagadiya_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model2_sumittagadiya_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model2_sumittagadiya_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sumittagadiya/qa_model2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model3_pipeline_en.md new file mode 100644 index 00000000000000..f9ed4f9dd09d4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model3_pipeline pipeline DistilBertForQuestionAnswering from sumittagadiya +author: John Snow Labs +name: qa_model3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model3_pipeline` is a English model originally trained by sumittagadiya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model3_pipeline_en_5.5.0_3.0_1725654553529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model3_pipeline_en_5.5.0_3.0_1725654553529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sumittagadiya/qa_model3 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_en.md new file mode 100644 index 00000000000000..0708982cc7eb14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_hansollll DistilBertForQuestionAnswering from Hansollll +author: John Snow Labs +name: qa_model_hansollll +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_hansollll` is a English model originally trained by Hansollll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_hansollll_en_5.5.0_3.0_1725652231464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_hansollll_en_5.5.0_3.0_1725652231464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_hansollll","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_hansollll", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_hansollll| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Hansollll/qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_pipeline_en.md new file mode 100644 index 00000000000000..9049602c0ff442 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model_hansollll_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model_hansollll_pipeline pipeline DistilBertForQuestionAnswering from Hansollll +author: John Snow Labs +name: qa_model_hansollll_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_hansollll_pipeline` is a English model originally trained by Hansollll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_hansollll_pipeline_en_5.5.0_3.0_1725652243655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_hansollll_pipeline_en_5.5.0_3.0_1725652243655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model_hansollll_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model_hansollll_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_hansollll_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Hansollll/qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_model_sif10_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_model_sif10_en.md new file mode 100644 index 00000000000000..5e3bad3f411a45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_model_sif10_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_sif10 DistilBertForQuestionAnswering from Sif10 +author: John Snow Labs +name: qa_model_sif10 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_sif10` is a English model originally trained by Sif10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_sif10_en_5.5.0_3.0_1725621813923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_sif10_en_5.5.0_3.0_1725621813923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_sif10","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_sif10", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_sif10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sif10/QA_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_real_data_test_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_real_data_test_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..c65a3f9085195c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_real_data_test_xlm_roberta_base_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_real_data_test_xlm_roberta_base XlmRoBertaForQuestionAnswering from prajwalJumde +author: John Snow Labs +name: qa_real_data_test_xlm_roberta_base +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_real_data_test_xlm_roberta_base` is a English model originally trained by prajwalJumde. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_real_data_test_xlm_roberta_base_en_5.5.0_3.0_1725598476737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_real_data_test_xlm_roberta_base_en_5.5.0_3.0_1725598476737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_real_data_test_xlm_roberta_base","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_real_data_test_xlm_roberta_base", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_real_data_test_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|813.4 MB| + +## References + +https://huggingface.co/prajwalJumde/QA_REAL_DATA_TEST_xlm_roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_18_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_18_en.md new file mode 100644 index 00000000000000..4a383fb518d663 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_18_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_redaction_nov1_18 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_redaction_nov1_18 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_18` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_18_en_5.5.0_3.0_1725631043502.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_18_en_5.5.0_3.0_1725631043502.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_18","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_18", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_18| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|795.3 MB| + +## References + +https://huggingface.co/am-infoweb/QA_REDACTION_NOV1_18 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_en.md new file mode 100644 index 00000000000000..5f11f1210bb05c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_redaction_nov1_19_a1 XlmRoBertaForQuestionAnswering from prajwalJumde +author: John Snow Labs +name: qa_redaction_nov1_19_a1 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_19_a1` is a English model originally trained by prajwalJumde. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a1_en_5.5.0_3.0_1725630585135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a1_en_5.5.0_3.0_1725630585135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_19_a1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_redaction_nov1_19_a1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_19_a1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/prajwalJumde/QA_REDACTION_NOV1_19-a1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_pipeline_en.md new file mode 100644 index 00000000000000..5bcb08b258dcda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_redaction_nov1_19_a1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_redaction_nov1_19_a1_pipeline pipeline XlmRoBertaForQuestionAnswering from prajwalJumde +author: John Snow Labs +name: qa_redaction_nov1_19_a1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_redaction_nov1_19_a1_pipeline` is a English model originally trained by prajwalJumde. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a1_pipeline_en_5.5.0_3.0_1725630708070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_redaction_nov1_19_a1_pipeline_en_5.5.0_3.0_1725630708070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_redaction_nov1_19_a1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_redaction_nov1_19_a1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_redaction_nov1_19_a1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.9 MB| + +## References + +https://huggingface.co/prajwalJumde/QA_REDACTION_NOV1_19-a1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_en.md new file mode 100644 index 00000000000000..d3ea10ff2c120c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_21_sept_with_finetune_1_0 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_21_sept_with_finetune_1_0 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_21_sept_with_finetune_1_0` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_21_sept_with_finetune_1_0_en_5.5.0_3.0_1725597420448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_21_sept_with_finetune_1_0_en_5.5.0_3.0_1725597420448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_21_sept_with_finetune_1_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_21_sept_with_finetune_1_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_21_sept_with_finetune_1_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|813.0 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_21_SEPT_WITH_FINETUNE_1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_pipeline_en.md new file mode 100644 index 00000000000000..416ea1a7170277 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_21_sept_with_finetune_1_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_21_sept_with_finetune_1_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_21_sept_with_finetune_1_0_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_21_sept_with_finetune_1_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_21_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725597524803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_21_sept_with_finetune_1_0_pipeline_en_5.5.0_3.0_1725597524803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_21_sept_with_finetune_1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_21_sept_with_finetune_1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_21_sept_with_finetune_1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|813.0 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_21_SEPT_WITH_FINETUNE_1.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_en.md new file mode 100644 index 00000000000000..d1c76e851d6fad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_25_sept_with_finetune_1_1_on19sept XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_25_sept_with_finetune_1_1_on19sept +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_25_sept_with_finetune_1_1_on19sept` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_1_on19sept_en_5.5.0_3.0_1725597638321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_1_on19sept_en_5.5.0_3.0_1725597638321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_25_sept_with_finetune_1_1_on19sept","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_25_sept_with_finetune_1_1_on19sept", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_25_sept_with_finetune_1_1_on19sept| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|819.6 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_25_SEPT_WITH_FINETUNE_1.1_on19sept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en.md new file mode 100644 index 00000000000000..84943ceb6452cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en_5.5.0_3.0_1725597733433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline_en_5.5.0_3.0_1725597733433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_25_sept_with_finetune_1_1_on19sept_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|819.6 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_25_SEPT_WITH_FINETUNE_1.1_on19sept + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..a56e50c08fb995 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en_5.5.0_3.0_1725640272948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base_en_5.5.0_3.0_1725640272948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synth_data_with_unanswerable_23_aug_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|801.1 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTH_DATA_WITH_UNANSWERABLE_23_AUG_xlm_roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..c64f8e5d9f132a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synthetic_data_train_real_data_test_xlm_roberta_base XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_train_real_data_test_xlm_roberta_base +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_train_real_data_test_xlm_roberta_base` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_train_real_data_test_xlm_roberta_base_en_5.5.0_3.0_1725597951240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_train_real_data_test_xlm_roberta_base_en_5.5.0_3.0_1725597951240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_train_real_data_test_xlm_roberta_base","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_train_real_data_test_xlm_roberta_base", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_train_real_data_test_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|805.8 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_TRAIN_REAL_DATA_TEST_xlm_roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..297252706493e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725598065971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725598065971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_train_real_data_test_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|805.8 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_TRAIN_REAL_DATA_TEST_xlm_roberta-base + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-question_answer_thirdeyedata_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-question_answer_thirdeyedata_pipeline_en.md new file mode 100644 index 00000000000000..4b09d93714c6f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-question_answer_thirdeyedata_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English question_answer_thirdeyedata_pipeline pipeline DistilBertForQuestionAnswering from ThirdEyeData +author: John Snow Labs +name: question_answer_thirdeyedata_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answer_thirdeyedata_pipeline` is a English model originally trained by ThirdEyeData. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answer_thirdeyedata_pipeline_en_5.5.0_3.0_1725621844411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answer_thirdeyedata_pipeline_en_5.5.0_3.0_1725621844411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("question_answer_thirdeyedata_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("question_answer_thirdeyedata_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answer_thirdeyedata_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ThirdEyeData/Question_Answer + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_en.md b/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_en.md new file mode 100644 index 00000000000000..7b6b8a819b31b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English question_answering_tutorial_practice DistilBertForQuestionAnswering from Erantr1 +author: John Snow Labs +name: question_answering_tutorial_practice +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_tutorial_practice` is a English model originally trained by Erantr1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_tutorial_practice_en_5.5.0_3.0_1725654472476.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_tutorial_practice_en_5.5.0_3.0_1725654472476.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("question_answering_tutorial_practice","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("question_answering_tutorial_practice", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_tutorial_practice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Erantr1/question_answering_tutorial_practice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_pipeline_en.md new file mode 100644 index 00000000000000..1789a17d2c05ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-question_answering_tutorial_practice_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English question_answering_tutorial_practice_pipeline pipeline DistilBertForQuestionAnswering from Erantr1 +author: John Snow Labs +name: question_answering_tutorial_practice_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_tutorial_practice_pipeline` is a English model originally trained by Erantr1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_tutorial_practice_pipeline_en_5.5.0_3.0_1725654485564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_tutorial_practice_pipeline_en_5.5.0_3.0_1725654485564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("question_answering_tutorial_practice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("question_answering_tutorial_practice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_tutorial_practice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Erantr1/question_answering_tutorial_practice + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_en.md b/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_en.md new file mode 100644 index 00000000000000..3b14a7213c4962 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English radbert_roberta_4m_zzxslp RoBertaEmbeddings from zzxslp +author: John Snow Labs +name: radbert_roberta_4m_zzxslp +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`radbert_roberta_4m_zzxslp` is a English model originally trained by zzxslp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/radbert_roberta_4m_zzxslp_en_5.5.0_3.0_1725660396936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/radbert_roberta_4m_zzxslp_en_5.5.0_3.0_1725660396936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("radbert_roberta_4m_zzxslp","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("radbert_roberta_4m_zzxslp","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|radbert_roberta_4m_zzxslp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.7 MB| + +## References + +https://huggingface.co/zzxslp/RadBERT-RoBERTa-4m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_pipeline_en.md new file mode 100644 index 00000000000000..48fe175f0a7004 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-radbert_roberta_4m_zzxslp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English radbert_roberta_4m_zzxslp_pipeline pipeline RoBertaEmbeddings from zzxslp +author: John Snow Labs +name: radbert_roberta_4m_zzxslp_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`radbert_roberta_4m_zzxslp_pipeline` is a English model originally trained by zzxslp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/radbert_roberta_4m_zzxslp_pipeline_en_5.5.0_3.0_1725660419141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/radbert_roberta_4m_zzxslp_pipeline_en_5.5.0_3.0_1725660419141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("radbert_roberta_4m_zzxslp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("radbert_roberta_4m_zzxslp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|radbert_roberta_4m_zzxslp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.7 MB| + +## References + +https://huggingface.co/zzxslp/RadBERT-RoBERTa-4m + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_en.md new file mode 100644 index 00000000000000..7d4cc12fcaeda6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English randomly_pruned_30_model DistilBertForSequenceClassification from andygoh5 +author: John Snow Labs +name: randomly_pruned_30_model +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`randomly_pruned_30_model` is a English model originally trained by andygoh5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/randomly_pruned_30_model_en_5.5.0_3.0_1725608666198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/randomly_pruned_30_model_en_5.5.0_3.0_1725608666198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("randomly_pruned_30_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("randomly_pruned_30_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|randomly_pruned_30_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/andygoh5/randomly-pruned-30-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_pipeline_en.md new file mode 100644 index 00000000000000..51c8194e9bc158 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-randomly_pruned_30_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English randomly_pruned_30_model_pipeline pipeline DistilBertForSequenceClassification from andygoh5 +author: John Snow Labs +name: randomly_pruned_30_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`randomly_pruned_30_model_pipeline` is a English model originally trained by andygoh5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/randomly_pruned_30_model_pipeline_en_5.5.0_3.0_1725608677674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/randomly_pruned_30_model_pipeline_en_5.5.0_3.0_1725608677674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("randomly_pruned_30_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("randomly_pruned_30_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|randomly_pruned_30_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/andygoh5/randomly-pruned-30-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_en.md new file mode 100644 index 00000000000000..5a68dbdc801f0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English refpydst_1p_icdst_split_v1 MPNetEmbeddings from Brendan +author: John Snow Labs +name: refpydst_1p_icdst_split_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`refpydst_1p_icdst_split_v1` is a English model originally trained by Brendan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/refpydst_1p_icdst_split_v1_en_5.5.0_3.0_1725595511299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/refpydst_1p_icdst_split_v1_en_5.5.0_3.0_1725595511299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("refpydst_1p_icdst_split_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("refpydst_1p_icdst_split_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|refpydst_1p_icdst_split_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Brendan/refpydst-1p-icdst-split-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_pipeline_en.md new file mode 100644 index 00000000000000..af73b2fdb9943f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-refpydst_1p_icdst_split_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English refpydst_1p_icdst_split_v1_pipeline pipeline MPNetEmbeddings from Brendan +author: John Snow Labs +name: refpydst_1p_icdst_split_v1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`refpydst_1p_icdst_split_v1_pipeline` is a English model originally trained by Brendan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/refpydst_1p_icdst_split_v1_pipeline_en_5.5.0_3.0_1725595531820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/refpydst_1p_icdst_split_v1_pipeline_en_5.5.0_3.0_1725595531820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("refpydst_1p_icdst_split_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("refpydst_1p_icdst_split_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|refpydst_1p_icdst_split_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Brendan/refpydst-1p-icdst-split-v1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_en.md b/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_en.md new file mode 100644 index 00000000000000..e7458fe92f8a58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English results_soniquentin DistilBertForTokenClassification from soniquentin +author: John Snow Labs +name: results_soniquentin +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_soniquentin` is a English model originally trained by soniquentin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_soniquentin_en_5.5.0_3.0_1725653613809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_soniquentin_en_5.5.0_3.0_1725653613809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("results_soniquentin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("results_soniquentin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_soniquentin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|377.1 MB| + +## References + +https://huggingface.co/soniquentin/results \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_pipeline_en.md new file mode 100644 index 00000000000000..4ce9b4a7e1ae7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-results_soniquentin_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English results_soniquentin_pipeline pipeline DistilBertForTokenClassification from soniquentin +author: John Snow Labs +name: results_soniquentin_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`results_soniquentin_pipeline` is a English model originally trained by soniquentin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/results_soniquentin_pipeline_en_5.5.0_3.0_1725653631317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/results_soniquentin_pipeline_en_5.5.0_3.0_1725653631317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("results_soniquentin_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("results_soniquentin_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|results_soniquentin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|377.1 MB| + +## References + +https://huggingface.co/soniquentin/results + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_en.md b/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_en.md new file mode 100644 index 00000000000000..18f472a9ac3528 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English reward_deberta_v3 DeBertaForSequenceClassification from Chat-Error +author: John Snow Labs +name: reward_deberta_v3 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reward_deberta_v3` is a English model originally trained by Chat-Error. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reward_deberta_v3_en_5.5.0_3.0_1725610037066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reward_deberta_v3_en_5.5.0_3.0_1725610037066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("reward_deberta_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("reward_deberta_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reward_deberta_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|642.8 MB| + +## References + +https://huggingface.co/Chat-Error/reward-deberta-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_pipeline_en.md new file mode 100644 index 00000000000000..627bc99aaa7320 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-reward_deberta_v3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English reward_deberta_v3_pipeline pipeline DeBertaForSequenceClassification from Chat-Error +author: John Snow Labs +name: reward_deberta_v3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reward_deberta_v3_pipeline` is a English model originally trained by Chat-Error. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reward_deberta_v3_pipeline_en_5.5.0_3.0_1725610087868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reward_deberta_v3_pipeline_en_5.5.0_3.0_1725610087868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("reward_deberta_v3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("reward_deberta_v3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reward_deberta_v3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|642.8 MB| + +## References + +https://huggingface.co/Chat-Error/reward-deberta-v3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_en.md b/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_en.md new file mode 100644 index 00000000000000..778391d0501e1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rg_fake_signatures_southern_sotho DistilBertForTokenClassification from chilliadgl +author: John Snow Labs +name: rg_fake_signatures_southern_sotho +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rg_fake_signatures_southern_sotho` is a English model originally trained by chilliadgl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rg_fake_signatures_southern_sotho_en_5.5.0_3.0_1725599383829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rg_fake_signatures_southern_sotho_en_5.5.0_3.0_1725599383829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("rg_fake_signatures_southern_sotho","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("rg_fake_signatures_southern_sotho", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rg_fake_signatures_southern_sotho| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/chilliadgl/RG_fake_signatures_ST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_pipeline_en.md new file mode 100644 index 00000000000000..e8ea71784ef1b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rg_fake_signatures_southern_sotho_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rg_fake_signatures_southern_sotho_pipeline pipeline DistilBertForTokenClassification from chilliadgl +author: John Snow Labs +name: rg_fake_signatures_southern_sotho_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rg_fake_signatures_southern_sotho_pipeline` is a English model originally trained by chilliadgl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rg_fake_signatures_southern_sotho_pipeline_en_5.5.0_3.0_1725599399354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rg_fake_signatures_southern_sotho_pipeline_en_5.5.0_3.0_1725599399354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rg_fake_signatures_southern_sotho_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rg_fake_signatures_southern_sotho_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rg_fake_signatures_southern_sotho_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/chilliadgl/RG_fake_signatures_ST + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_babe_ft_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_babe_ft_pipeline_en.md new file mode 100644 index 00000000000000..349ce3a5d06c3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_babe_ft_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_babe_ft_pipeline pipeline RoBertaForSequenceClassification from mediabiasgroup +author: John Snow Labs +name: roberta_babe_ft_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_babe_ft_pipeline` is a English model originally trained by mediabiasgroup. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_babe_ft_pipeline_en_5.5.0_3.0_1725613475688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_babe_ft_pipeline_en_5.5.0_3.0_1725613475688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_babe_ft_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_babe_ft_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_babe_ft_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|446.1 MB| + +## References + +https://huggingface.co/mediabiasgroup/roberta-babe-ft + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_fr.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_fr.md new file mode 100644 index 00000000000000..28160cf5ee3d19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French roberta_base_french RoBertaEmbeddings from ClassCat +author: John Snow Labs +name: roberta_base_french +date: 2024-09-06 +tags: [fr, open_source, onnx, embeddings, roberta] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_french` is a French model originally trained by ClassCat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_french_fr_5.5.0_3.0_1725660848605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_french_fr_5.5.0_3.0_1725660848605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_french","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_french","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|fr| +|Size:|464.8 MB| + +## References + +https://huggingface.co/ClassCat/roberta-base-french \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_pipeline_fr.md new file mode 100644 index 00000000000000..1cd0b041360512 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_french_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French roberta_base_french_pipeline pipeline RoBertaEmbeddings from ClassCat +author: John Snow Labs +name: roberta_base_french_pipeline +date: 2024-09-06 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_french_pipeline` is a French model originally trained by ClassCat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_french_pipeline_fr_5.5.0_3.0_1725660871580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_french_pipeline_fr_5.5.0_3.0_1725660871580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_french_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_french_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|464.8 MB| + +## References + +https://huggingface.co/ClassCat/roberta-base-french + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_en.md new file mode 100644 index 00000000000000..be265a760b9422 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_multinerd_jayant_yadav RoBertaForTokenClassification from jayant-yadav +author: John Snow Labs +name: roberta_base_multinerd_jayant_yadav +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_multinerd_jayant_yadav` is a English model originally trained by jayant-yadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_multinerd_jayant_yadav_en_5.5.0_3.0_1725638532055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_multinerd_jayant_yadav_en_5.5.0_3.0_1725638532055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_multinerd_jayant_yadav","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_multinerd_jayant_yadav", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_multinerd_jayant_yadav| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|461.1 MB| + +## References + +https://huggingface.co/jayant-yadav/roberta-base-multinerd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_pipeline_en.md new file mode 100644 index 00000000000000..b007a7a1627d39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_base_multinerd_jayant_yadav_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_multinerd_jayant_yadav_pipeline pipeline RoBertaForTokenClassification from jayant-yadav +author: John Snow Labs +name: roberta_base_multinerd_jayant_yadav_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_multinerd_jayant_yadav_pipeline` is a English model originally trained by jayant-yadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_multinerd_jayant_yadav_pipeline_en_5.5.0_3.0_1725638554770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_multinerd_jayant_yadav_pipeline_en_5.5.0_3.0_1725638554770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_multinerd_jayant_yadav_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_multinerd_jayant_yadav_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_multinerd_jayant_yadav_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|461.2 MB| + +## References + +https://huggingface.co/jayant-yadav/roberta-base-multinerd + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_classifier_large_finetuned_clinc_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_classifier_large_finetuned_clinc_1_en.md new file mode 100644 index 00000000000000..9458cb6a378017 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_classifier_large_finetuned_clinc_1_en.md @@ -0,0 +1,104 @@ +--- +layout: model +title: English RoBertaForSequenceClassification Large Cased model (from lewtun) +author: John Snow Labs +name: roberta_classifier_large_finetuned_clinc_1 +date: 2024-09-06 +tags: [en, open_source, roberta, sequence_classification, classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-large-finetuned-clinc-1` is a English model originally trained by `lewtun`. + +## Predicted Entities + +`todo_list`, `card_declined`, `cook_time`, `pto_request_status`, `calendar`, `spending_history`, `next_holiday`, `tell_joke`, `ingredients_list`, `change_language`, `restaurant_suggestion`, `min_payment`, `pin_change`, `whisper_mode`, `date`, `international_visa`, `plug_type`, `w2`, `translate`, `pto_used`, `thank_you`, `alarm`, `shopping_list_update`, `flight_status`, `change_volume`, `bill_due`, `find_phone`, `carry_on`, `reminder_update`, `apr`, `user_name`, `uber`, `calories`, `report_lost_card`, `change_accent`, `payday`, `timezone`, `reminder`, `roll_dice`, `text`, `current_location`, `cancel`, `change_ai_name`, `weather`, `directions`, `jump_start`, `recipe`, `timer`, `what_song`, `income`, `change_user_name`, `tire_change`, `sync_device`, `application_status`, `lost_luggage`, `meeting_schedule`, `what_is_your_name`, `credit_score`, `gas_type`, `maybe`, `order_checks`, `do_you_have_pets`, `oil_change_when`, `schedule_meeting`, `interest_rate`, `rollover_401k`, `how_old_are_you`, `last_maintenance`, `smart_home`, `book_hotel`, `freeze_account`, `nutrition_info`, `bill_balance`, `improve_credit_score`, `pto_balance`, `replacement_card_duration`, `travel_suggestion`, `calendar_update`, `transfer`, `vaccines`, `update_playlist`, `mpg`, `schedule_maintenance`, `confirm_reservation`, `repeat`, `restaurant_reservation`, `meaning_of_life`, `gas`, `cancel_reservation`, `international_fees`, `routing`, `meal_suggestion`, `time`, `change_speed`, `new_card`, `redeem_rewards`, `insurance_change`, `insurance`, `play_music`, `credit_limit`, `balance`, `goodbye`, `are_you_a_bot`, `restaurant_reviews`, `todo_list_update`, `rewards_balance`, `no`, `spelling`, `what_can_i_ask_you`, `order`, `reset_settings`, `shopping_list`, `order_status`, `ingredient_substitution`, `food_last`, `transactions`, `make_call`, `travel_notification`, `who_made_you`, `share_location`, `damaged_card`, `next_song`, `oil_change_how`, `taxes`, `direct_deposit`, `who_do_you_work_for`, `yes`, `exchange_rate`, `definition`, `what_are_your_hobbies`, `expiration_date`, `car_rental`, `tire_pressure`, `accept_reservations`, `calculator`, `account_blocked`, `how_busy`, `distance`, `book_flight`, `credit_limit_change`, `report_fraud`, `pay_bill`, `measurement_conversion`, `where_are_you_from`, `pto_request`, `travel_alert`, `flip_coin`, `fun_fact`, `traffic`, `greeting`, `oos` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_classifier_large_finetuned_clinc_1_en_5.5.0_3.0_1725613233563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_classifier_large_finetuned_clinc_1_en_5.5.0_3.0_1725613233563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +seq_classifier = RoBertaForSequenceClassification.pretrained("roberta_classifier_large_finetuned_clinc_1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, seq_classifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val seq_classifier = RoBertaForSequenceClassification.pretrained("roberta_classifier_large_finetuned_clinc_1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, seq_classifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.classify.roberta.clinc.v2large_finetuned.by_lewtun").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_classifier_large_finetuned_clinc_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/lewtun/roberta-large-finetuned-clinc-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_es.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_es.md new file mode 100644 index 00000000000000..521610ec132589 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish roberta_large_bne_capitel_sayula_popoluca RoBertaForTokenClassification from BSC-LT +author: John Snow Labs +name: roberta_large_bne_capitel_sayula_popoluca +date: 2024-09-06 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_bne_capitel_sayula_popoluca` is a Castilian, Spanish model originally trained by BSC-LT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_bne_capitel_sayula_popoluca_es_5.5.0_3.0_1725624724522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_bne_capitel_sayula_popoluca_es_5.5.0_3.0_1725624724522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_bne_capitel_sayula_popoluca","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_bne_capitel_sayula_popoluca", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_bne_capitel_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|1.3 GB| + +## References + +https://huggingface.co/BSC-LT/roberta-large-bne-capitel-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_pipeline_es.md new file mode 100644 index 00000000000000..b61e64a1a52ffd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_bne_capitel_sayula_popoluca_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_large_bne_capitel_sayula_popoluca_pipeline pipeline RoBertaForTokenClassification from BSC-LT +author: John Snow Labs +name: roberta_large_bne_capitel_sayula_popoluca_pipeline +date: 2024-09-06 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_bne_capitel_sayula_popoluca_pipeline` is a Castilian, Spanish model originally trained by BSC-LT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_bne_capitel_sayula_popoluca_pipeline_es_5.5.0_3.0_1725624791095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_bne_capitel_sayula_popoluca_pipeline_es_5.5.0_3.0_1725624791095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_bne_capitel_sayula_popoluca_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_bne_capitel_sayula_popoluca_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_bne_capitel_sayula_popoluca_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|1.3 GB| + +## References + +https://huggingface.co/BSC-LT/roberta-large-bne-capitel-pos + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_en.md new file mode 100644 index 00000000000000..32b34641eb39b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_boolq RoBertaForSequenceClassification from nfliu +author: John Snow Labs +name: roberta_large_boolq +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_boolq` is a English model originally trained by nfliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_boolq_en_5.5.0_3.0_1725612775790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_boolq_en_5.5.0_3.0_1725612775790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_boolq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_large_boolq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_boolq| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nfliu/roberta-large_boolq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_pipeline_en.md new file mode 100644 index 00000000000000..d1b496cbdfc606 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_boolq_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_boolq_pipeline pipeline RoBertaForSequenceClassification from nfliu +author: John Snow Labs +name: roberta_large_boolq_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_boolq_pipeline` is a English model originally trained by nfliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_boolq_pipeline_en_5.5.0_3.0_1725612844446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_boolq_pipeline_en_5.5.0_3.0_1725612844446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_boolq_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_boolq_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_boolq_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nfliu/roberta-large_boolq + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_large_finnish_v2_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_finnish_v2_pipeline_fi.md new file mode 100644 index 00000000000000..04291e7cf52085 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_large_finnish_v2_pipeline_fi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Finnish roberta_large_finnish_v2_pipeline pipeline RoBertaEmbeddings from Finnish-NLP +author: John Snow Labs +name: roberta_large_finnish_v2_pipeline +date: 2024-09-06 +tags: [fi, open_source, pipeline, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_finnish_v2_pipeline` is a Finnish model originally trained by Finnish-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_finnish_v2_pipeline_fi_5.5.0_3.0_1725660921086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_finnish_v2_pipeline_fi_5.5.0_3.0_1725660921086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_finnish_v2_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_finnish_v2_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_finnish_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Finnish-NLP/roberta-large-finnish-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_graphcodebert_MT4TS_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_graphcodebert_MT4TS_en.md new file mode 100644 index 00000000000000..56a8e3527f28a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_graphcodebert_MT4TS_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForTokenClassification Cased model (from kevinjesse) +author: John Snow Labs +name: roberta_ner_graphcodebert_MT4TS +date: 2024-09-06 +tags: [bert, ner, open_source, en, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `graphcodebert-MT4TS` is a English model originally trained by `kevinjesse`. + +## Predicted Entities + +`NodeTypesType`, `EsQueryAlertParams`, `FieldType`, `Handles`, `ServiceManager`, `SpatialViewDefinitionProps`, `UploadedVideoFileOption`, `A11yConfig`, `TEffects`, `ts.ElementAccessExpression`, `KleisliIO`, `SerializableState`, `AlertsByName`, `AuthenticateEmailRequest`, `GameService`, `GCM`, `TForwardOptions`, `CurlCode`, `DisputableVotingData`, `SimplePath`, `OptimizationResult`, `TransactionModel`, `AuthProvider`, `ButtonType.StyleAttributes`, `IStateDB`, `MediaTrackCapabilities`, `Authorization`, `ConciseBody`, `IItemTemplate`, `UrlOptions`, `NameStyle`, `RegionInfoProviderOptions`, `IAst`, `DescribeClustersRequest`, `reqType`, `AppProps`, `Graphin`, `GunGetOpts`, `LanguageServer`, `Tensor4D`, `ColorSet`, `IFile`, `PlayerActions`, `SpaceBonus.STEEL`, `VisitedItem`, `Materialize.ChipDataObject`, `ConnectedUser`, `SessionTypes.Settled`, `RigConfig`, `StashResult`, `ChannelMessage`, `BrowserObject`, `SubscriptionAccountInfo`, `DeleteApp`, `Referenced`, `IParameterDefinitionsSource`, `BinaryTargetsEnvValue`, `Preposition`, `IResultSetValue`, `BrowserEvent`, `Claim`, `EventAggregatorService`, `MetricDimensionDefinition`, `AST.Node`, `VcsService`, `DataListProps`, `MActor`, `Contactable`, `NotebookCellData`, `StatefulSearchBarDeps`, `SocketConnection`, `CommandBuilder`, `FilterInput`, `Gunzip`, `Models.WebHook`, `ServerResponseService`, `PrincipalCV`, `CallAst`, `TPageConfig`, `DiffOptions`, `RangeAsyncIterable`, `Y`, `TwistyPlayer`, `TmdbMovieResult`, `IEventFunction`, `EventRequest`, `Wire`, `SerializedAction`, `SortingType`, `B7`, `CoreFeatures`, `SurveyResultModel`, `WritingSettingsDelegate`, `IAstElement`, `IPackageDescriptor`, `ThyPopoverRef`, `StitchesProps`, `LoggingEvent`, `TemplateExecutor`, `SymVal`, `ContextMenuService`, `TaskConfig`, `SettingsFile`, `AwaitNode`, `ScrollLogicalPosition`, `GleeConnection`, `IbkrEvents`, `ISiteState`, `ActorId`, `ChatType`, `DocumentChange`, `TIdType`, `DeviceClass`, `ApiValue`, `MsgBlock`, `DrawIOUMLDiagram`, `DbMempoolTx`, `LS.CancellationToken`, `LedgerRequest`, `IFollow`, `Katana`, `IParentNode`, `MaterialRenderContext`, `AnimationFactory`, `interfaces.Lookup`, `ExportedConfig`, `ArticleEntity`, `IProxyContext`, `DefaultFocusState`, `HTMLIonSegmentButtonElement`, `CurrencyOption`, `ProvenClaim`, `HlsManifest`, `IExternalDeviceId`, `SankeyDiagramDataView`, `unwrapContext`, `requests.ListLimitValuesRequest`, `PivotItem`, `VanessaTabs`, `FunctionalUseCaseContext`, `NamedArgTypeBuildNode`, `vscode.Position`, `MessageEvent`, `NodeCG`, `EmitTextWriter`, `SDKError`, `OrganizationEmploymentType`, `RenderPassDescriptor`, `ConcurrentWorkerSet`, `PaletteDefinition`, `BaselineFileContent`, `IntrinsicType`, `GroupItem`, `WorkspaceLeaf`, `KbnFieldType`, `XroadIdentifier`, `ElementRect`, `HTMLImageSource`, `VProps`, `Hertz`, `GameState`, `LitParser`, `MasterDataObject`, `StateChange`, `IAureliaProject`, `FadingFeatureParameters`, `PluginDeleteActionPayload`, `SubmittableExtrinsic`, `GetConnectivityInfoCommandInput`, `ComputeImage`, `RemoteEndpointConfiguration`, `CreateClusterResponse`, `SpaceBonus.TITANIUM`, `Strapi`, `TestAdapter`, `EditPoint`, `DataRequestDescriptor`, `PageButtonProps`, `IHDPreviewState`, `SdkClientMetricFrame`, `THREE.Euler`, `ContextSetImpl`, `WriteLeaderboardRecordRequest`, `ModuleInstance`, `PreviousSpeakersState`, `SparseMerkleTree`, `TaskInProgress`, `CustomNode`, `QueueMap`, `FkDstrGridData`, `tr.actions.Args`, `TickFormatterOptions`, `protos.google.protobuf.IEmpty`, `RenderPassToDefinitionMap`, `SharedValue`, `X12Segment`, `IAzureMapFeature`, `ModelEvaluateDatasetArgs`, `GithubIssue`, `DictionaryType`, `IKactusFile`, `StringifyContext`, `InternetGateway`, `ExportType`, `StatResult`, `ReportService`, `TMessageContent`, `OptimizeCssOutput`, `ColorStyle`, `NumericLiteral`, `ListEmailIdentitiesCommandInput`, `Queries`, `AddressString`, `UniswapV1Client`, `CameraUpdateResult`, `nodeFetch.RequestInit`, `BoneSlot`, `ExecAsyncResult`, `TableOperationColumn`, `AppSettings`, `ICommandContext`, `DeleteOptions`, `FunctionService`, `SessionContext`, `GfxPrimitiveTopology`, `UgoiraInfo`, `HsAddDataLayerDescriptor`, `XliffMerge`, `UITextField`, `_NotificationConfig`, `GraphQLFieldMap`, `GetMessagesFormatterFn`, `ComponentCtor`, `ValidationExceptionField`, `Utils.ITimeProvider`, `ChildAppRequestConfig`, `EMailProcessingStatus`, `io.Socket`, `PrintTypeFlags`, `FunctionsMetadata`, `LoopReducer`, `DataTypeFieldAndChildren`, `HttpClientOptions`, `PlanetGravity`, `AxiosResponseGraphQL`, `IterableFactory`, `ILexoNumeralSystem`, `ApproxResult`, `DeleteRepositoryCommand`, `AuthenticationProviderOptions`, `HeaderGroup`, `BlockNumberRepository`, `DepthwiseConv2D`, `Long`, `CompilerSystem`, `IProjectSetupData`, `requests.ListBootVolumesRequest`, `ICommandOptionDefinition`, `WarningPrettyPrinter`, `JsonFormsState`, `MlRouteProps`, `ActionsSdkApp`, `BinarySensorCCAPI`, `TileLevel`, `TileDocument`, `NavigatorAxis`, `RetryLink`, `IFunctionCallArgument`, `ColorPickerItem`, `SamplerDescriptor`, `VariantAnnotationSummary`, `Models.OrderStatusReport`, `RemoteProvider`, `DeleteListenerCommandInput`, `ComponentTreeNode`, `GPUTexture`, `QueryParamDef`, `APIChannel`, `ClientUser`, `IAppStrings`, `ioBroker.Object`, `PagesLoaded`, `MovimientoModel`, `NoteDoc`, `Debouncer`, `Balance`, `React.HTMLProps`, `EnumOptions`, `PreprocessorSync`, `ColumnApi`, `GenericCall`, `IChangeDiscussionItem`, `UpdateApplicationCommandOutput`, `FormValues`, `UserAnnotation`, `CryptoFrame`, `JobNumbered`, `KeyedDeep`, `IMatchOptions`, `d.JsonDocs`, `SettingsDataUpdate`, `Zoo`, `ToastProps`, `MessageBus`, `ConfigSetExecutionGroup`, `GeneratedFont`, `ModuleJob`, `HitDatabaseEntry`, `SendPropDefinition`, `TThis`, `FuncInfo`, `TensorLike2D`, `IExternalPrice`, `NoteType`, `Dependencies`, `ITodosState`, `MrujsPluginInterface`, `AxiosRequestConfig`, `MicroframeworkLoader`, `DeploymentCenterFormData`, `ObjectTypeMetadata`, `ListsPluginRouter`, `CollisionObject2DSW`, `StreamQuery`, `TokenModel`, `TargetSummary`, `AccountsOperationStep`, `OptionLike`, `ExtraOptions`, `ParseNodeType`, `BasicGraphOnEdges`, `ContextFlags`, `IViewEntity`, `HyperScriptHelperFn`, `AST`, `DaffOrderReducerState`, `LanguageState`, `ResourceLink`, `RTCRtpCodingParameters`, `DateSegments`, `TodoFilter`, `UiActionsEnhancedSerializedEvent`, `RendererType2`, `EventHub`, `IProxySettings`, `ICodeBuilder`, `IReferenceSite`, `Zone`, `aws.autoscaling.Policy`, `ResumeData`, `DescribeAlgorithmCommandInput`, `PartialCell`, `PvsDefinition`, `MenuID`, `VueAutoRoutingPlugin`, `ComparisonFunction`, `Events.pointerdragend`, `SubMeshStaticBatch`, `FieldFormatMap`, `MetricName`, `AlertMessage`, `AddressAnalyzer`, `EmptyAction`, `DateRangePickerProps`, `HopeElement`, `IRoutes`, `IMatchResult`, `B11`, `MessageLogger`, `TestCollection`, `ListFlowsCommandInput`, `Atoms`, `PaginatedRequestOptions`, `QueryBidResponse`, `HasLocation`, `MultiFileRenderResult`, `VtexHttpClient`, `BottomSheetNavigationState`, `BasePoint`, `reduxForm.ConverterForm`, `ReactMouseEvent`, `UpdateParams`, `HistoriesService`, `FreezerContract`, `AS`, `MockCallback`, `CharMap4`, `PackageJsonLookup`, `GrowableBuffer`, `BrowseService`, `TokenFields`, `InjectorModule`, `PivotAggsConfig`, `SigningWallet`, `ExpressRouteCrossConnection`, `NewTorrentOptions`, `BuildSupport`, `CertificateResponse`, `IpcService`, `vscode.NotebookData`, `KibanaResponseFactory`, `LiveMap`, `Events.exitviewport`, `Error_ContextEntry`, `DownwriteUIState`, `UserOperation`, `FieldBase`, `IElementColors`, `BuildContext`, `PutLoggingConfigurationCommandInput`, `SecurityProviders`, `IRgb`, `Ora`, `IUrl`, `ServiceConfigDescriptor`, `FallbackProvider`, `Uuid`, `JsonLdDocumentProcessingResult`, `ResourceDetails`, `GlobalAction`, `SqlTuningAdvisorTaskSummaryReportObjectStatFindingSummary`, `ShareStoreMap`, `TAggParam`, `GetVpcLinkCommandInput`, `AudioVideoController`, `SegmentBase`, `ManyToMany`, `ElasticLoadBalancingV2MetricChange`, `AlignConstraint`, `HandlerNS.Event`, `DaffAuthorizeNetReducerState`, `KeyProvider`, `HTMLParagraphElement`, `RedAgateElement`, `UserAuth`, `ScopedStateManager`, `UpdateDetectorRecipeDetectorRule`, `APIRequest`, `fromRepositoriesStatisticsActions.GetRepositoriesStatisticsCollection`, `OrgInfo`, `InputStyleProps`, `GlobalVariantGroupConfig`, `UserCreateInput`, `TaskGroup`, `WebpackConfiguration`, `FileDto`, `CombatStats`, `ExecutionConfig`, `TransitionProps`, `AssetPublishing`, `Relative`, `WebMessageRawPayload`, `DepthStyleProps`, `CommonCrypto`, `NumberListProps`, `ServerEngine`, `server.Position`, `protos.common.SignaturePolicy`, `CompilerHost`, `SearchParams`, `requests.ListWaasPolicyCustomProtectionRulesRequest`, `AnimatorDuration`, `SqlTaggedTemplate`, `ProcessErrorEvent`, `IconifyIconCustomisations`, `ShortUrlRecord`, `SPClientTemplates.RenderContext_Form`, `DocumentProcessorServiceClient`, `DialogStateReturn`, `UndoManager`, `SwaggerSpec`, `ChatUser`, `ApplicationQuestion`, `StringToken`, `Timing`, `JUser`, `OpenApiParameter`, `_Explainer`, `runnerGroup`, `CGSize`, `SavedObjectConfig`, `EndCondition`, `TransportMessage`, `ts.LeftHandSideExpression`, `TranslatorService`, `ListenOptions`, `ProductAnalyticalResult`, `AnimationEvent`, `SchemaProvider`, `ProxyGroup`, `RawGraphData`, `ERC20`, `AbsoluteFilePath`, `Cancellation`, `BlockchainHandler`, `RegisteredMessage`, `OffscreenCanvasRenderingContext2D`, `SupportedPackageManagers`, `AdapterFindOptions`, `ScrollEvent`, `ISimplestConfigFile`, `ConfigurationModel`, `DefaultExecutor`, `KMSKey`, `IPCMessages.TMessage`, `RecordManager`, `ClipEdge`, `AttributeDefinition`, `SObjectRefreshOutput`, `ReportingInternalSetup`, `RelationQueryBuilder`, `InternalServiceException`, `ContextValueType`, `PartyLeave`, `DictionaryFileType`, `ReduxLikeStateContainer`, `LogsData`, `SetStateCommitmentJSON`, `ComponentWithUse`, `NSMutableDictionary`, `VideoInfo`, `SettingsModel`, `DeployProxyOptions`, `TypeDBOptions`, `Service$`, `mozEvent`, `IDatabaseResultSet`, `ListProjectsCommandOutput`, `MockTextChannel`, `IPageChangeEvent`, `AliasMapItem`, `ProductMap`, `IICUMessage`, `WriteBatch`, `TemplateSummary`, `DevOpsAccount`, `Intl.DateTimeFormatOptions`, `HighlightData`, `ThemeOption`, `TVLAnalysis`, `protos.common.MSPPrincipal`, `FsApi`, `SxToken`, `Metadata_Item`, `TspClient`, `d.ComponentCompilerProperty`, `ITemplates`, `NotificationChannelServiceClient`, `BearerTokenResponse`, `DataFetcherOptions`, `IGatsbyImageData`, `UpdateThemeCommandInput`, `PgClass`, `PaperProps`, `MyCustomObservable`, `Ecies`, `requests.DeleteJobRequest`, `Models.AccessTier`, `ListSnapshotBlocksCommandInput`, `JobStatus`, `WebhookClient`, `StoredAppChallenge`, `EveesMutation`, `AuditService`, `TBuilder`, `ExposedThing`, `ToJsonProperties`, `OptionalResources`, `ProposalTx`, `YamlMapping`, `ActivityFeedEvent`, `CombatAttack`, `AstroConfig`, `ClassNameStates`, `tape.Test`, `DecoratorOptions`, `SPClientTemplates.RenderContext`, `FrameworkConfiguration`, `PermissionLevel`, `ABLDocument`, `IChamber`, `common.Region`, `ChannelsState`, `Shift`, `FocusZoneDefinition`, `PymStub`, `ScopeSelector`, `NeverType`, `SCHEMA`, `DinoController`, `ShortcutService`, `KeybindingRegistry`, `requests.ListJobsRequest`, `GLRenderer`, `WebsocketRequest`, `anchor.web3.PublicKey`, `FakerStatic`, `ConfigProviderProps`, `SendableMsg`, `AutofillMonitor`, `OutputChannelLogger`, `OriginalDocumentUrl`, `LegacyOpenSearchError`, `ButtonLabelIconProps`, `TStoreName`, `ShaderPass`, `Allure`, `ReadableAtom`, `RangeFilterParams`, `firebase.Promise`, `MDCProgressView`, `Descriptor`, `BuffData`, `CharList`, `FetchHeaders`, `DataStartDependencies`, `ESLImage`, `PurchaseList`, `InfoActor`, `CanvasSystemIcon`, `OperatorContextFunction`, `AClass`, `ImageState`, `AggregateRoot`, `OnReferenceInvalidatedEvent`, `NotificationsStart`, `RtmpOutput`, `Constructable`, `ParameterComputeType`, `ObjectValue`, `TimeService`, `ExpressionAstNode`, `Dev`, `ArgumentNode`, `ScreenService`, `ListAppInstancesCommandInput`, `TspanWithTextStyle`, `NoInputAndNoOutputCommandInput`, `AxisSpace`, `AutofillField`, `IAppProps`, `ComposedPublicDevice`, `SubcodeLine`, `SchedulerLike`, `PredicateModel`, `ClassProvider`, `TStyle`, `Partial`, `Pickle`, `MediatorFactory`, `BaseRenderer`, `DueDate`, `NodeLocation`, `TipLengthCalibration`, `DoorLockLoggingCCRecordGet`, `ContractPrincipalCV`, `url.Url`, `SuggestionsComponentProps`, `DateWrapperFormatOptions`, `GenericDispatch`, `Models.CurrencyPair`, `Domains`, `WorkerResponse`, `Backbone.ObjectHash`, `MockStakingContract`, `ScreenMatrixPixel`, `WebMscore`, `capnp.Data`, `SelectionArea`, `monaco.languages.LanguageConfiguration`, `PUPPET.payloads.Room`, `TObj`, `DataToGPUOptions`, `Int32`, `Carrier`, `SafeResourceUrl`, `IDynamicPerson`, `PubScript`, `UpdateAccountRequest`, `ISkin`, `CaseNode`, `EdgeCalculatorHelper`, `IdleState`, `ts.Program`, `ODataCallable`, `TypesImportData`, `PassphraseError`, `RouteChildrenProps`, `BasePeerType`, `ng.IRootScopeService`, `ISample`, `RouteMap`, `WorkRequestSummary`, `PermissionObjectType`, `State`, `ElementProperties`, `Credit`, `RNode`, `ExceptionHandler`, `BarStyleAccessor`, `UserSettingsState`, `BitBuffer`, `ContractCaller`, `InputTimeRange`, `StringLiteral`, `TransactionContext`, `ParseNode`, `Settings`, `ICompiledRules`, `IDistro`, `SetRepositoryPolicyCommandInput`, `DaffCategoryPageMetadata`, `IRole`, `StoreState`, `FieldParamEditorProps`, `DeleteDatasetRequest`, `DidChangeTextDocumentParams`, `GuildChannel`, `DatabaseResultSet`, `GeometryContainmentRequestProps`, `TypeDescriptor`, `IVector3`, `PageMetadata`, `IfStatementContext`, `P2PMessagePacketBufferData`, `RemoveTagsFromResourceCommand`, `d.SourceMap`, `BitbucketUserRepository`, `SelectionModel`, `BckCtrlData`, `EnvoyContext`, `IterableIterator`, `IBpmnModeler`, `UserFacade`, `WsService`, `TradeSearchHttpQuery`, `TFlags`, `ProjectClient`, `BitField`, `ELineTypes`, `Real_ulong_numberContext`, `Models`, `BluetoothCharacteristicUUID`, `ToComponent`, `SocketState`, `AsyncState`, `SequenceKey`, `EmailTemplateService`, `STEP_RECORDS`, `StageCrewMember`, `ViewStateProps`, `RelationQueryBuilderContract`, `VaccinationEntry`, `DragResult`, `WebWorker`, `TypeOfExpression`, `LineAnnotationStyle`, `JoinTree`, `NowResponse`, `IStatusProvider`, `LogGroup`, `ExtendedIColony`, `IMdcRadioElement`, `HStackProps`, `bAsset`, `WebviewPanel`, `ITrack`, `ITypedResponse`, `EventMetadata`, `LinkedAccountsService`, `PreventCheck`, `ResponseInterface`, `MssPackage`, `IExchangeInfo`, `FrameOverTime`, `TagComponent`, `NumericNode`, `FrameworkVersionId`, `PolylinePoint`, `CookieStorage`, `RCloneFile`, `SummaryST`, `FeaturedSessions`, `IDocumentStorage`, `ValuesProps`, `FleetMetricDefinition`, `ClientLibraryState`, `InjectFlags`, `ILogger`, `TemporalArgs`, `KeyInDocument`, `TRPCClient`, `DaffConfigurableProductVariant`, `DisconnectionEvent`, `PresSlide`, `UpdateGatewayCommandInput`, `AutowiredOptions`, `PoolingService`, `ModuleCode`, `Clique`, `ComboType`, `OperationMethod`, `VerticalTextAlignment`, `socketio.Server`, `BatchCertificateTransfer`, `RpcMessageData`, `LocalStorageKeys`, `OrthographicCamera`, `OrmService`, `AssetDetails`, `Tracer`, `VideoStreamDescription`, `ExportRecord`, `MockConfig`, `DhcpOption`, `AbiFunction`, `SurveyResultMongoRepository`, `IKeyboardDefinitionAuthorType`, `RawAbiDefinition`, `DeclarationInfo`, `ILineTokens`, `FormDefinition`, `PaymentV1`, `ReadonlyMap`, `Highcharts.QuadTreeNode`, `RebootDBInstanceCommandInput`, `PipeOptions`, `OverlayStart`, `JestAssertionError`, `MetaReducer`, `IDocumentElementKey`, `PoseNetConfig`, `InetLocation`, `CliOptions`, `IScripts`, `IRowIndices`, `TableRowProps`, `Pagination`, `YallistNode`, `SendResponseParams`, `BundleModule`, `StackDescriptor`, `FormGroupState`, `MethodArgsRegistry`, `FortaConfig`, `MaxPooling2D`, `ResolveRequest`, `MulticallRequest`, `Installation`, `DeploymentTargetsOperationIO`, `DocumentLink`, `MenuPositionX`, `StaticSiteUserProvidedFunctionAppARMResource`, `CreateGroupResponse`, `SignedDebtOrder`, `Resilience`, `DiffSelection`, `GuidGenerator`, `MDCLineRippleAdapter`, `DeleteGlobalClusterCommandInput`, `InversionFix`, `SnapshotNode`, `ElementKind`, `SubtitlesCardBase`, `WordType`, `OnResolveArgs`, `ForStatement`, `ArrowFunctionExpression`, `CallIdChangedListener`, `CHR0_NodeData`, `DeleteChannelBanCommandInput`, `UnscopedEmitHelper`, `FormDialogService`, `DeleteFileOptions`, `Prisma.SortOrder`, `UsersAction`, `TransactionList`, `FilterEvent`, `ISampleSizeBox`, `App.windows.IWindowModuleMap`, `DatePickerDayDateSource`, `JRPCEngineEndCallback`, `DesignedState`, `WaveformItem`, `GraphQLEnumValueConfigMap`, `ExpandedBema`, `ESLint`, `ConfigurationPropertyDict`, `UndoStack`, `Chain`, `CreatePortalCommandInput`, `DiffInfo`, `ResponseComposition`, `FindTilesAdditionalParameters`, `PartiallyEmittedExpression`, `QuestionStatus`, `PreloadData`, `ActiveQuery`, `JestProcess`, `Tracker`, `ValidationSchema`, `PostProcessingRule`, `Events.postkill`, `PBBox`, `CommandInfo`, `MiddlewareAPI`, `EventProperties`, `IHTMLCollection`, `TooltipPosition`, `ILoaderIncludePipe`, `TreeDir`, `IntPretty`, `RpcClientFactory`, `AlertServicesMock`, `FocusRingOptions`, `IAnimatable`, `ICellEditorParams`, `ProcessRepresentation`, `DBClusterRole`, `DetailsProps`, `DocgeniHost`, `SegmentHandler`, `AngularFireDatabase`, `OnTouchedType`, `OsuBuffer`, `DomSanitizer`, `lf.query.Select`, `SoftVis3dShape`, `ProtoFab`, `IProfile`, `DetectorCallback`, `Angulartics2Matomo`, `OnboardingLightData`, `PiInterface`, `IRes`, `EdmxProperty`, `ConnectedSpaceId`, `CreateFileSystemCommandInput`, `CraftTextRun`, `BankTransfer`, `DragItem`, `SelectBox`, `jest.DoneCallback`, `AzureWizardPromptStep`, `ServerView`, `JestPlaywrightConfig`, `PageDensity`, `Pkcs12ReadResult`, `EncArrayBuffer`, `Lint.WalkContext`, `NotExpression`, `RequirementFn`, `GlobalPooling2DLayerArgs`, `Unsubscriber`, `requests.ListFastConnectProviderVirtualCircuitBandwidthShapesRequest`, `InlineField`, `ListDedicatedIpPoolsCommandInput`, `SNSInvalidTopicFault`, `Serializable`, `DeferredDefinition`, `GitCommit`, `ajv.ErrorObject`, `ValidateRuleOptions`, `SheetContainer`, `SecurityUtilsPlugin`, `SubMeshRenderElement`, `ReviewId`, `RustError`, `ProxyOptions`, `GenericStoreEnhancer`, `BaseTask`, `MeshAnimationTrack`, `FilterCondition`, `listenTypes`, `InterfaceTypeDefinitionNode`, `nodes.Declaration`, `DeleteQueueCommandInput`, `TreeNode`, `FragmentMap`, `SubscriptionHandler`, `ParameterScope`, `XQuery`, `AwrDbWaitEventBucketSummary`, `CSSDataManager`, `StyledForwardStyle`, `MergeResults`, `ColorPicker`, `IToastAttrs`, `FormControlProps`, `GetInstanceCommandInput`, `UpdateType`, `ListUsersCommand`, `BaseTx`, `ArrayOperation`, `CommitTransactionCommandInput`, `EnumInfo`, `IRegisterItem`, `AppsCommands`, `UseTournamentRoundsState`, `ElementSourceAnalysis`, `LexicalToken`, `TimelineViewWrapper`, `PartialTransaction`, `CraftTextBlock`, `RelationComparisonResult`, `SwitchOrganizationCommand`, `SolverT`, `ISetBreakpointsArgs`, `IBetaState`, `AccountSettings`, `S3Control`, `TableCellSlot`, `GfxRenderCache`, `_Code`, `InternalStacksContext`, `MaterialUiPickersDate`, `RLYTTextureMatrix`, `ProductCategory`, `EmbeddedOptions`, `FeatureID`, `Metadata`, `TransformCallback`, `MVideoUUID`, `RedBlackTree`, `Padding`, `ApplicationContract`, `requests.ListLoadBalancerHealthsRequest`, `RestClientOptions`, `KeyResult`, `UpdateResult`, `MDCTextField`, `ts.TypeReference`, `ProjectionRule`, `TTypescript.ParsedCommandLine`, `ZeroBalanceFn`, `TokenLocation`, `GDQOmnibarListItemElement`, `tcp.ConnectionInfo`, `Intermediate`, `CreateHsmCommandInput`, `BlockchainTimeModel`, `EventHandlerInfosForElement`, `ParsedQueryNode`, `UrlSerializer`, `Selectable`, `ProductEntity`, `ICodeEditor`, `SpriteRenderer`, `FileStats`, `SagaGenerator`, `ServerActionHandler`, `ExtractScript`, `MainDate`, `BuildingEntity`, `ISignerProvider`, `CallAgent`, `IVec2Term`, `NotNeededPackage`, `IAssetSearchParams`, `WorkflowMap`, `AutonomousDatabaseKeyHistoryEntry`, `AttributeType`, `IRuntimePosition`, `SpyLocation`, `ContractOptions`, `TFnRender`, `TextDocumentPositionParams`, `TokenAccount`, `ArrayValue`, `ServiceSpy`, `ICellInfo`, `ComponentDocument`, `DateFormatOptions`, `Feedback`, `CancellationId`, `EmojiData`, `TextDocumentContentChangeEvent`, `Mission`, `DetailedCloudFormationStack`, `ChartsState`, `IColorMappingFunction`, `ClientChannel`, `ListFindingsCommandInput`, `IStreamInfo`, `instantiation.IConstructorSignature7`, `IProfileMetaData`, `Bindable`, `TimeTrackingEntryIded`, `QueryRef`, `ListSourcesRequest`, `SharedServiceProvider`, `NumericType`, `FeatureConfig`, `Mjolnir`, `EventQueueItem`, `DescribePendingMaintenanceActionsCommandInput`, `CloudTasksClient`, `React.ReactText`, `BaseConvertService`, `BubbleSeries`, `Diff`, `NcTab`, `PhysicalQueryPlanNode`, `SolarWeek`, `DeserializationOption`, `HostWithPathOperationCommandInput`, `NAVObject`, `Channels`, `AssetKey`, `DependencyName`, `AnInterface`, `SecurityHub`, `LogicalExpression`, `messages.Ci`, `AuthPermissions`, `EventState`, `WidgetOptions`, `LibraryOptions`, `DataAccess`, `TreeModelNode`, `CallHierarchyIncomingCallsParams`, `PoiTableEntryDef`, `EventCategoriesMap`, `ApiSettings`, `WriterContext`, `IThemedToken`, `BuiltIns`, `HookDecorator`, `ListSchemaVersionsCommandInput`, `Lane4`, `TextProps`, `NodeSubType`, `DeepImmutableObject`, `SettingsContextProps`, `RowSchema`, `W1`, `DeclarationReference`, `SourceFileSystem`, `ServerMethods`, `Tolerance`, `Apollo.Apollo`, `AlignmentFactory`, `SendCommandOptions`, `PluginHostProps`, `StateStore`, `StaticFunctionDecl`, `LSConfigManager`, `ModelCtor`, `TProvider`, `DirectiveArgs`, `InternalKeyComparator`, `ShareArgs`, `AutoforwardConfig`, `PlayerList`, `ts.Decorator`, `TextBlock`, `Vec2`, `DescribeChannelBanCommandInput`, `MockWindow`, `ISignature`, `PathStartCoordinates`, `TestFileInfo`, `CustomSkillBuilder`, `ShipSource`, `dKy_tevstr_c`, `ThermostatFanModeCCReport`, `Dayjs`, `SqrlEntity`, `TimeRanges`, `AlternateSymbolNameMap`, `SkillLogicData`, `DeviceLog`, `IncrementalNode`, `EdgeCalculatorDirections`, `WorldCoordinates`, `PreciseNumber`, `GenericDraweeHierarchyBuilder`, `ValidationVisOptionsProps`, `SessionResponse`, `IsSpecificCellFn`, `ITEM_TYPE`, `DataMapper`, `EnhancedSelector`, `RulesMap`, `Reply`, `IGetTimeLogInput`, `Initialized`, `PanelHeaderProps`, `AnnotationWriter`, `BranchNode`, `CreateDeviceDTO`, `HealthCheck`, `Assets`, `TxType`, `BaseLayer`, `Kinds`, `SimpleChange`, `BookmarkTreeItem`, `ImplementedFunctionOptions`, `RTCRtpParameters`, `MarkSpec`, `MediaStreamAudioSourceNode`, `ClassAndSelector`, `InitialStylingValues`, `ObsConfiguration`, `ChartDownload`, `ImageUrlOptions`, `DocCollection`, `OrdenationType`, `RexFile`, `DecodedInformation`, `Ui`, `BaseShape`, `StatsTable`, `VisibleTreeNodes`, `ScanPaginator`, `EnumRow`, `SchedulerPromiseValue`, `CombatGameState`, `VApiTy`, `IEndpointOptions`, `AnkiConnectRequest`, `MockStream`, `OptionsState`, `NSType`, `IListItem`, `DGroup`, `IParticipant`, `PopupStackItem`, `AgentConfig`, `EndpointOptions`, `ts.SourceFile`, `AliasedFeature`, `AnyError`, `SelectEvent`, `PoolCache`, `EnvId`, `OptionalFindOptions`, `IUpworkClientSecretPair`, `FenceContext`, `FieldOptions`, `NodeContainer`, `IDynamicGrammarGeneric`, `PrerenderHydrateOptions`, `PlaceTradeDisplayParams`, `QueryKey`, `MenuDataItem`, `DebugProtocol.Message`, `TagResourceInput`, `DeleteAuthorizerCommandInput`, `Http3PriorityFrame`, `FunctionImportParameters`, `SlotDoc`, `IPackageJson`, `KWin.Client`, `TileDataSourceOptions`, `IMediatorMapping`, `RSV`, `OpHandler`, `MemFS`, `YearCell`, `Reference`, `HttpFetchOptionsWithPath`, `HdTronPayments`, `ts.BooleanLiteral`, `NarrativeSchema`, `SCN0_Camera`, `MatSnackBarRef`, `StackDeployOperation`, `RepositoriesStore`, `d.CssToEsmImportData`, `DBClient`, `ManifestCacheProjectAddedEvent`, `PlaceEntity`, `StackNavigationOptions`, `AnimeDetailsFields`, `RemoveSourceIdentifierFromSubscriptionCommandInput`, `DIDDataStore`, `FirebaseObjectObservable`, `DbStxLockEvent`, `IDraggableData`, `GameStateModel`, `$p_Declaration`, `ValidatePurchaseResponse`, `CacheManager`, `CollectionDependencyManifest`, `types.Position`, `Zeros`, `IOptimizeOptions`, `PrismaClientConstructor`, `CurveLocationDetailPair`, `ng.ICompileProvider`, `GetBlacklistReportsCommandInput`, `AssetPropertyVariant`, `EventsFnOptions`, `LoginOptions`, `Release`, `ImGuiIO`, `PeerApiResponse`, `VolumeAttachment`, `U8Node`, `Globe`, `Camera`, `Markdown`, `Hover`, `SnapshotRelation`, `UrlObject`, `OrganizationConfig`, `RecognizerResult`, `restm.IRestResponse`, `NettuAppResponse`, `LuaType`, `string`, `LogAnalyticsSourcePattern`, `UpdatePublicData`, `ExecutionItem`, `DiscussionEntity`, `TerminalOptions`, `Island`, `AnnotationProviderBase`, `Services.Plugins`, `LaunchOptions`, `server.IConnection`, `requests.ListDataGuardAssociationsRequest`, `Fog`, `OscillatorNode`, `SqlHelper`, `SearchInWorkspaceWidget`, `Polyface`, `WeuData`, `NamedMember`, `AlertState`, `SyncServer`, `LiveEventMessagingService`, `Highcharts.Options`, `IDownload`, `BotAnchorPoint`, `AppPage`, `Jwt`, `SchemaMap`, `Scraper`, `ISPList`, `StoredPath`, `TestingUser`, `DraftBlockType`, `StreamDeckWeb`, `PAT0_MatData`, `Apify.RequestQueue`, `SanityDocument`, `BaseEntity`, `MongoConnection`, `ContractConfig`, `TypedLanguage`, `CustomEventInit`, `PositionAnimation`, `DiscoverFieldDetailsProps`, `Cypress.ConfigOptions`, `IRestApiResponse`, `IParser`, `VmNetworkDetails`, `TableSelectionArea`, `Transpose`, `HttpHeaders`, `TexChunk`, `BridgeContracts`, `TransactionService`, `VisualViewport`, `DeleteScheduleCommandInput`, `LogicOperator`, `STATUS`, `NgPackagrBuilderOptions`, `QueueClient`, `FileStore`, `StateFor`, `IViewArgs`, `LocationInfo`, `DisplayObjectWithCullingArray`, `Math.Vector3`, `FunctionFactory`, `IFileSnapshot`, `Stone`, `UnicodeSurrogateRangeTable`, `IGetProjectsStatistics`, `FavoritesState`, `IntermediateToken`, `IAuthentication`, `CronOptions`, `ConfigItem`, `TaskManagerSetupContract`, `Visitors`, `SBDraft2ExpressionModel`, `Plugin`, `ColumnModelInterface`, `DataObject`, `IExportMapMetadata`, `ConsoleMessageType`, `ComponentSlotStyle`, `VdmMappedEdmType`, `ScalarParamNameContext`, `TrackByFunction`, `Fig.Subcommand`, `OnCallback`, `YearToDateProgressConfigModel`, `StackPath`, `TranslationEntry`, `DataRequestMeta`, `StoreBase`, `DebouncedState`, `ComponentResolverService`, `WidgetType`, `VideoQualitySettings`, `GithubAuthProvider`, `CompilerSystemRenameResults`, `WholeJSONType`, `PathFinderPath`, `AddTagsCommand`, `SpeedtestResult`, `TDDraw`, `PluginTransformResults`, `IBaseAddressAsyncThunk`, `ParamValues`, `LegacySocketMessage`, `BaseDirective`, `DynamicEntry`, `Testrec`, `IRenderDimensions`, `RequestBodyObject`, `EmployeeRecurringExpenseService`, `TagState`, `t.Errors`, `Keyword`, `PolicyBuilderConfig`, `CLM.Template`, `FakeChain`, `StylableResults`, `SortConfig`, `RebirthWindow`, `CascaderContextType`, `FontName`, `EntityCollectionDataService`, `FileHandlerAPIs`, `TypeUtil`, `UserDescription`, `DeeplyMocked`, `TypistOptions`, `Fetcher`, `InstallOptifineOptions`, `JobAgent`, `SortClause`, `_IIndex`, `MapEntity`, `Price`, `requests.ListAvailablePackagesForManagedInstanceRequest`, `ArgumentInfo`, `PlaybackRate`, `UserConfiguredActionConnector`, `SymbolWriter`, `IGLTFRuntime`, `CreateRegistryCommandInput`, `ShaderOptions`, `FakeSurveyDialog`, `IGceHealthCheck`, `NzThItemInterface`, `Solo`, `HtmlNode`, `PushNotificationData`, `IRefCallback`, `ImageBox`, `ReactElement`, `HostCancellationToken`, `RTCRtpEncodingParameters`, `ActionheroLogLevel`, `IHandleProps`, `Battle`, `NavMenu`, `PropInfo`, `LoginData`, `PerspectiveGetResult`, `IAdminUser`, `CaptionDescription`, `StoreGetter`, `IErrorObject`, `RawVueFileName`, `HAP`, `ResponderModel`, `BaseElement`, `IQueryOptions`, `KanbanBoardRecord`, `Toplevel`, `JPAResource`, `TourStep`, `AnalyticsConfig`, `NodeCryptoCreateHash`, `IParallelEnumerable`, `WorkRequestWaiter`, `MockDocument`, `JobService`, `XmlParser`, `SocketIoConfig`, `TransformPivotConfig`, `MemberAccessInfo`, `ComboBoxMenuItemGroup`, `DiagnosticSeverityOverrides`, `MeterChange`, `BitGo`, `DataLayout`, `Expression`, `AccessKeyId`, `MetricIndicator`, `LiskErrorObject`, `Entity.Account`, `InterviewQuestionSortMap`, `RpcConnection`, `RecordRow`, `OwnPropsOfRenderer`, `StatusBarItem`, `IContractWrapper`, `ts.FormatCodeOptions`, `TasksStore`, `DescribeLoggingOptionsCommandInput`, `RARCDir`, `IConfigurationSnippet`, `theia.WorkspaceFolder`, `ScalarsEnumsHash`, `SyncState`, `RendererService`, `FutureNumber`, `Frame`, `IEvents`, `MaxHeap`, `ICoords`, `Pouchy`, `NetType`, `TraceServiceSummary`, `AbstractModelApplication`, `KeysState`, `api.ISnapshotTree`, `SerializableRecord`, `SyncGroup`, `FactoryUDFunction`, `MigrationItem`, `StructService`, `ErrorConstructor`, `yubo.WaveOptions`, `DrivelistDrive`, `IStoreState`, `IMatchableOrder`, `Matrix2`, `IAttributeData`, `ErrorChain`, `RNNLayerArgs`, `NcTabs`, `Emitter`, `LayerListItem`, `ApplicationCommand`, `protocol.FileRequestArgs`, `DiagnosticResult`, `IncorrectFieldTypes`, `t_As`, `ApiTypes.Groups.MessagesById`, `SVGRenderer.ClipRectElement`, `VKeyedCollection`, `MovieState`, `MyResource`, `ChangesetGenerationHarness`, `IAuthZConfig`, `AncestorDefs`, `SearchResultItem`, `IResizeEvent`, `MapScalarsOptions`, `ActiveModifiers`, `VueRouter`, `PolicyDocument`, `PlayOptions`, `StatefulSet`, `IRuleConfig`, `Push`, `Parser`, `PlayerSubscription`, `PrettySubstitution`, `Event_PropertiesEntry`, `SWRInfiniteKeyLoader`, `JQLite`, `Granularity`, `TestClientLogger`, `EncryptedPassphraseObject`, `Capabilities`, `ServerRoute`, `IGatewayMemberXmpp`, `IDateRangeActivityFilter`, `LuaState`, `TopNavMenuData`, `ValidatorSpec`, `AxisMap`, `RebaseResult`, `ChangedEvent`, `HnCache`, `ANodeExpr`, `ESLSelectOption`, `MessageTypeMapEntry`, `NgModuleDef`, `TrackEvent`, `ResultData`, `MetricDescriptor`, `Highcharts.ClusterAndNoiseObject`, `MatDatepickerIntl`, `FrameworkOptions`, `SearchAllIamPoliciesRequest`, `ContainerAdapter`, `GetDomainItemsFn`, `TaskDraft`, `HttpProbe`, `TestItem`, `p5exClass`, `FilePathStore`, `CurveLocationDetail`, `IdSet`, `DialogSubProps`, `TreemapNode`, `ComputationCache`, `QueryExecutorFn`, `SegmentAPIIntegrations`, `YAMLParser`, `cheerio.Root`, `BigNumber`, `HotkeysEvent`, `d.ResolveModuleIdOptions`, `FileMode`, `OptionsOrGroups`, `ISoundSampleDescription`, `d3.Selection`, `SaveOptions`, `BuildEnv`, `DestinationSearchResult`, `PropertyDocumentationBlock`, `RuntimeService`, `WindowType`, `GraphContract`, `Effector`, `ECSSystem`, `cdk.StackProps`, `MetricDataPoint`, `IChainConfig`, `TestDisposable`, `IFormSectionGroup`, `VisiterStore`, `ConstantSchema`, `ControllerRenderProps`, `InteractionManager`, `IFilterItem`, `DataViewComponentState`, `EntityLike`, `LoadCollections`, `MoveAction`, `UpdateConfigurationSetEventDestinationCommandInput`, `TimeQuery`, `BaseShrinkwrapFile`, `ListPoliciesRequest`, `IntrospectionObjectType`, `ILanguage`, `AzureNamingServiceOptions`, `Posts`, `IExternalFormValues`, `QueryResolvers.Resolvers`, `BinaryLike`, `IRunConfig`, `ParsedInterval`, `NgEnvironment`, `Warning`, `Anomaly`, `NSMutableArray`, `d.ScreenshotDiff`, `ISODateTime`, `SimpleUnaryOperation`, `Behaviour`, `TreeModelNodeInput`, `ITableOfContents`, `StorageProxy`, `LocalWallet`, `QueueFunctions`, `TransitionDescription`, `INativeMetadataCollector`, `StandardPrincipalCV`, `IDraggableList`, `GetLaunchConfigurationCommandInput`, `ManagementSection`, `StepBinding`, `PIXI.interaction.InteractionEvent`, `INumberColumn`, `LogDescription`, `UpdateGatewayInformationCommandInput`, `ControlParams`, `RenderContextBase`, `MetricSet`, `GoogleAuth`, `RedirectOptions`, `KernelMessage.IOptions`, `ICompactPdfTextObj`, `StringValueToken`, `AssembledPipelinesGraphics`, `DefaultEditorSideBarProps`, `i18n.Message`, `ModdleElement`, `SurveyObjectProperty`, `TagData`, `evt_sym`, `TranslaterPoint`, `Base58CheckResult`, `FindManyOptions`, `DetailListService`, `ProjectToApiAnalysis`, `GovernElement`, `CodeGenerator`, `ProjectModel`, `CompletionStatus`, `ProgressCb`, `EditorChangeLinkedList`, `EmitBlockKind`, `CodeGen`, `SelectOutputDir`, `GreetingWithErrorsOutput`, `ColorConfig`, `Reminder`, `ICommon`, `ICourse`, `Pack`, `TagProps`, `VisualSettings`, `OsmObject`, `AsyncAction`, `RecordDataIded`, `TradeExchangeMessage`, `PSPoint`, `LLVMContext`, `IDataViewOptions`, `MediaItem`, `VElement`, `VideoConverterFactory`, `ColorGradient`, `VisualizeInput`, `VirtualKeyboard`, `ParticipantListItemStyles`, `ts.StringLiteral`, `TextStyleProps`, `SketchName`, `ISearch`, `CorePreboot`, `IncompleteSubtypeInfo`, `RelatedRecords`, `Touched`, `BarData`, `SelectOptionValue`, `StoreChangeEvent`, `NextApiReq`, `ProjectsService`, `WorkflowModel`, `ITransformHooks`, `TicTacToeGameState`, `ChampList`, `InputAndOutputWithHeadersCommandInput`, `LiteralExpression`, `UnitProps`, `CalendarDateInfo`, `MapView`, `TradeFetchAnalyzeEntry`, `NodeResult`, `k8s.types.input.core.v1.PodTemplateSpec`, `ethers.providers.TransactionRequest`, `ImmutableAssignment`, `SharePluginStart`, `UniqueIdGenerator`, `NamedBinding`, `AutocompleteItem`, `FleetMetricSummaryDefinition`, `MlLicense`, `ModernServerContext`, `StatusBar`, `AggParamsState`, `EntryContext`, `ITour`, `TFieldName`, `StreamingCmd`, `ImportInterfaceWithNestedInterface`, `FromTo`, `GfxBufferP_WebGPU`, `AttributeReader`, `FieldDefn`, `TResolvedResponse`, `SFUISchemaItem`, `WsClient`, `GunScope`, `AllSelection`, `LockStepVersionPolicy`, `TextureCube`, `AuthParams`, `NewObjectOptions`, `TransformerProps`, `TxSummary`, `IndyPool`, `FastifyReply`, `ExpressionRenderDefinition`, `LimitExceededException`, `AppState`, `UseTransactionQuery`, `SearchResultsArtist`, `PerfToolsMutation`, `Box2`, `IamRoleName`, `ClusterSettingsReasonResponse`, `Clipper`, `Formatters`, `CircularList`, `PositionLimitOrderID`, `RestRequestMethod`, `StoryGetter`, `DropViewNode`, `CampaignItemType`, `AugmentedActionContext`, `LayoutedNode`, `STPPaymentContext`, `NgEssentialsOptions`, `IDocumentMergeConflict`, `WeightsManifestConfig`, `Hasher`, `SymbolDisplayPartKind`, `TargetedAnimation`, `BasePacket`, `grpc.CallOptions`, `IDataMessage`, `FeatureGroup`, `ISelector`, `DataPublicPluginEnhancements`, `TextStringNoLinebreakContext`, `BabelPluginChain`, `WalletOrAddress`, `CallbackError`, `SimpleButton`, `TallyType`, `WechatMaterialEntity`, `EditionId`, `CloudBuildClient`, `MetadataService`, `ComponentLookupSpec`, `DeleteRequest`, `ModelType`, `PaletteType`, `OriginConfig`, `ReadModelRegistry`, `SPHttpClientResponse`, `HTMLWalkState`, `CollectionViewLayout`, `ProductProps`, `RouteDryMatch`, `AnyToVoidFnSignature`, `VirtualContestInfo`, `RoutableTileWay`, `LineCaps`, `TextRangeWithKind`, `SideBarView`, `VCSConnector`, `DeleteApplicationCommand`, `SuiAccordionPanel`, `DeleteSnapshotCommandInput`, `ts.ArrayTypeNode`, `Knex.SchemaBuilder`, `TestComponentProps`, `AudioSelector`, `IBreakpoint`, `ResultState`, `MutateResult`, `Unit`, `Pagerank`, `AuthSigner`, `PeekZFrame`, `RedBlackTreeNode`, `InputBoolean`, `ProfileStateModel`, `MultiLanguageBatchInput`, `GitHubClient`, `Primary`, `EvaluationContext`, `UpdateProfile`, `BarEntry`, `IBaseImageryLayer`, `QueuingStrategy`, `SharedMatrix`, `LinesGeometry`, `UserConfig`, `Transform3D`, `QueryImpl`, `GetBranchCommandInput`, `GlobalService`, `OpenSearchDashboardsRequest`, `XmlEmptyMapsCommandInput`, `ITransUnit`, `Identifiers`, `CollectionDataService`, `ContainerRef`, `BodyImplementation`, `GetReviewerStatisticsPayload`, `AppManifest`, `PipelineResult`, `AnyObjectNode`, `LocalizeFunc`, `requests.ListMaintenanceRunsRequest`, `BaseMemory`, `WalletKey`, `EventAxis`, `SettingTypes`, `requests.ListWorkRequestsRequest`, `CurrentMoveInfo`, `BiKleisli`, `LocationData`, `EnvironmentAliasProps`, `ImageScanner`, `InputButtonCombo`, `GlyphElement`, `VcsItemConfig`, `LayoutSandbox`, `AlertIconProps`, `JsonSchema`, `ListPackagesForDomainCommandInput`, `KeyPair`, `ReferencedSymbol`, `UpdateResults`, `GmailMsg`, `Knex.Transaction`, `IAppServiceWizardContext`, `RRNode`, `IPageInfo`, `SFPPackage`, `WebController`, `Src`, `PuppetASTContainerContext`, `Models.QuotingParameters`, `UserIdDTO`, `EditableBlock`, `YAMLSchemaService`, `Express.Multer.File`, `BuildOptionsInternal`, `TagDescription`, `IChainForkConfig`, `MentionDefaultDataItem`, `GADNativeAd`, `NavigationProvider`, `ResponseErrorAttributes`, `UpdateUserAvatarService`, `MiniMap`, `StructsLib1.InfoStruct`, `OutputError`, `MessageFileType`, `NVM3Objects`, `PrayerTimes`, `MsgHandler`, `TermsIndexPatternColumn`, `RouteDefinitions`, `MarkdownTable`, `Intl.NumberFormatOptions`, `JSX.TargetedKeyboardEvent`, `DrawBufferType`, `QListWidgetItem`, `StoreGroupLike`, `DePacketizerBase`, `MessageFormatterOptions`, `QuestionAdornerViewModel`, `RouteLocationNormalizedLoaded`, `PostRepository`, `MixItem`, `IPropertiesElement`, `ElementDefinition`, `ContractData`, `GestureResponderEvent`, `MemberData`, `VariablePart`, `SceneEmitterHolder`, `HeroService`, `StaffLayout`, `SystemType`, `SymbolAccessibilityResult`, `DescribeFleetsCommandInput`, `AnimationPromise`, `ExtensionNodeAttrs`, `FunctionAppEditMode`, `EventBuilder`, `EnhancedReducerResult`, `PageConfig`, `ModalProps`, `ActionsRecord`, `MatOpM`, `ProgressBarState`, `BinaryOpNode`, `SingleKeyRangeSet`, `StoreOptions`, `AnimationEasing`, `MemoryRenderer`, `V1DaemonSet`, `CurrentRequest`, `d.JsonDocsProp`, `UpdateEntryType`, `ICredentialType`, `PersianDate`, `BeanWrapper`, `MultiFn1O`, `ILayoutRestorer`, `IGherkinStreamOptions`, `CrochetCommandBranch`, `d.PropOptions`, `ParserContext`, `ReturnType`, `SfdxTask`, `IDownloadOptions`, `VisualizationProps`, `SolStateMerkleProof`, `DataPublicPluginStart`, `ProviderEventType`, `BabelOptions`, `PureReducer`, `Dog`, `NzMentionComponent`, `util.TestRunError`, `TorusStorageLayerAPIParams`, `TensorArrayMap`, `AxisAlignedBounds`, `DomainData`, `Enums`, `ResizableTest`, `Constructor`, `FlowCall`, `OfIterable`, `DocfyResult`, `IStyleObj`, `LaunchTemplateSpecification`, `AsyncThunks`, `JsCodeShift`, `AddressState`, `Models.CommandInput`, `ContentLocation`, `KeyboardShortcut`, `InferGetStaticPropsType`, `RollupOptions`, `StickyDirection`, `SubscribeFunction`, `IOsdUrlStateStorage`, `ProtocolParams.Propose`, `IReactExtension`, `ProofCommand`, `IMovie`, `OpenYoloInternalError`, `WorkingDirectoryFileChange`, `GDevice`, `MediaChange`, `ResultProgressReporter`, `StreamingFeeState`, `BeatUnitDot`, `TestERC721Token`, `LocalizationProviderProps`, `GetDomainStatisticsReportCommandInput`, `Criterion`, `SpeechSynthesisEvent`, `MediaSlot`, `ResetPasswordInput`, `ABLParameter`, `ScrollViewProps`, `LineCollection`, `StripeConfig`, `RTDB.Subscribe`, `GenericDefault`, `SandDance.VegaDeckGl.types.LumaBase`, `IListSelectionConfig`, `JGOFNumericPlayerColor`, `FluidDataStoreContext`, `ErrorChunk`, `GenerateFileCommandParameters`, `SelectOptionProps`, `SubcodeWidget`, `SendCommandCommandInput`, `DeleteUserProfileCommandInput`, `FrameItem`, `HalResourceConstructor`, `d.TranspileModuleResults`, `HttpInterceptord`, `CursorState`, `ItemBuffer`, `ProtectedRequest`, `_IPBRMetallicRoughness`, `WStatement`, `DebugSystem`, `SelfDescribing`, `ConsoleAPI`, `XroadConf`, `ObjectButNotFunction`, `IApiRequestBody`, `DocumentClient`, `ConstantAndVariableQueryStringCommandInput`, `WithBoolean`, `HostRecord`, `Crisis`, `VisualizeEmbeddableFactoryDeps`, `MetadataCacheResult`, `IBaseEdge`, `ExpressionKind`, `EntityOp`, `Highcharts.AnnotationEventEmitter`, `DotnetInsights`, `StructureCollection`, `LURLGroup`, `Round`, `PackageInstructionsBlock`, `HTMLScLegendElement`, `ColDef`, `VoilaGridStackPanel`, `UpdateDocumentCommandInput`, `MtxGroup`, `TriggerProps`, `GameTreeNode`, `BitWriter`, `ProfileX`, `BlockSyntaxVersion`, `ExpandedTransitionListener`, `Hour`, `GanttBarObject`, `MapSet`, `OutputType`, `GridProps`, `Freeze`, `APProcessorOptions`, `WebpackWorker`, `StringLocation`, `SavedObjectsOpenPointInTimeOptions`, `IdentityArgs`, `RetryPolicy`, `ConstructorTypeNode`, `AdditionalProps`, `XNumber`, `ResetDBParameterGroupCommandInput`, `FieldToValueMap`, `JSDocAugmentsTag`, `ReduceArguments`, `RegLogger`, `TronUnlock`, `DefaultEmitOptions`, `TreeView.DropLocation`, `ISpec`, `DashboardContainerFactory`, `OpcuaForm`, `EdmxReturnType`, `InsertWriteOpResult`, `ArraySchema`, `GpuStats`, `angular.ui.IStateParamsService`, `LFO`, `WriteItem`, `UserConfiguration`, `RequesterType`, `App.storage.IStorageApiWrapper`, `requests.ListAddressListsRequest`, `TRWorld`, `GraphQLResolverContext`, `ASTTransformer`, `ListUserGroupsRequest`, `ViewPortHandler`, `SubscribeState`, `CallSignatureDeclaration`, `GaugeAction`, `CompositeBrick`, `HalLink`, `NativeReturnValue`, `IRealtimeEdit`, `AuthToken`, `files.FullPath`, `IIonicVersion`, `ParingTable`, `SimpleNotification`, `SelectableState`, `StreamPipelineInput`, `EncodeOption`, `SetDefaultPolicyVersionCommandInput`, `SidebarTitleProps`, `SecretVerificationRequest`, `TileSetAssetPub`, `ContentTypeSchema`, `Argon.SessionPort`, `ICategoricalColorMappingFunction`, `KonvaEventObject`, `SubSequence`, `SeriesBarColorer`, `UnsignedOrder`, `ScreenProps`, `Lunar`, `ClusterInfo`, `SModelRootSchema`, `NextPageWithLayout`, `TypeDefinition`, `PayloadType`, `ColumnScope`, `IMenu`, `BaseRecordConstructor`, `HighlightItem`, `ParamT`, `LoadOnDemandEvent`, `ts.ESMap`, `SavedState`, `ForeignAttributeSelector`, `ActionStatusEnum`, `PerpV2Fixture`, `PluginViewWidget`, `ProjectFn2`, `AttributeDecoratorOptions`, `IVocabulary`, `THREE.BufferGeometry`, `CoreCompiler`, `ReadableByteStreamController`, `Axes`, `IWorkflow`, `DataViewHierarchy`, `ModelsTreeNodeType`, `InstanceGeometryData`, `EventCategoriesMessage`, `DesktopCapturerSources`, `UpdateOptions`, `InterpolationPart`, `HapCharacteristic`, `IZosFilesOptions`, `PossiblyAsyncOrderedIterable`, `NonArpeggiate`, `ErrorArgs`, `CSSVariables`, `OrganizationPostData`, `InstallVirtualAppParams`, `d.HotModuleReplacement`, `LeagueStore`, `GitRepo`, `BottomNavigationViewType`, `DataResolverInputHook`, `RoutesWithContent`, `com.google.firebase.database.DataSnapshot`, `MeetingSessionStatusCode`, `IRenderOptions`, `Events.postupdate`, `TimelineRecord`, `TBuffer`, `MethodDeclarationStructure`, `ObservableParticle`, `ReadRepository`, `NetworkKeys`, `ManipulatorCallback`, `ColonToken`, `IUpworkDateRange`, `IBlob`, `TransactionMeta`, `MIRBody`, `StagePanelsManagerProps`, `MissingItem`, `StreamPresenceEvent`, `PluginClass`, `AssemblerQueryService`, `ImageResolution`, `MagicSDKAdditionalConfiguration`, `RectAnnotationSpec`, `ForwardedRef`, `VMLClipRectObject`, `tf.io.ModelArtifacts`, `CellPlugin`, `coreClient.CompositeMapper`, `UntypedBspSet`, `RequestData`, `LocationAccessor`, `ICorrelationTableEntry`, `Dealer`, `CourseId`, `InfiniteData`, `Sub`, `MonthlyForecast`, `ISearchGeneric`, `ClientProxy`, `StartOptions`, `SweepContour`, `HarmajaOutput`, `INotificationTemplate`, `IRawOperationMessage`, `VideoTile`, `AwaitExpression`, `Extractor`, `ResponderExecutionModes`, `d3Geo.GeoProjection`, `HostElement`, `CodepointType`, `SyncMember`, `GetFreeBalanceStateResult`, `PianoNote`, `MicrosoftDocumentDbDatabaseAccountsResources`, `Found`, `MaterialSet`, `InteractionProps`, `Tensor5D`, `ControlType`, `DataModifier`, `ConverterLogger`, `ts.FunctionExpression`, `SpawnSyncOptionsWithStringEncoding`, `ServicesAccessor`, `LabelCollector`, `IBufferLine`, `ParsedIniData`, `JobTrigger`, `AppContext`, `MachineContext`, `IImageryConfig`, `Global`, `ListAttendeesCommandInput`, `ModifierKeys`, `ClientMatch`, `Payport`, `MetricOptions`, `CreateRoomCommandInput`, `MeasuredBootEntry`, `DatasetMemberEntry`, `PromiseType`, `ScreenContextData`, `AddressDTO`, `ResourceTag`, `GraphicContentProps`, `AppThunkDispatch`, `TreeItemIndex`, `ProxyServer`, `core.IHandle`, `InvoiceItem`, `CalendarsImpl`, `ServerStatus`, `StorageFieldItem`, `FeatureSetup`, `PublicIdentifier`, `SubsetCheckResult`, `S3Source`, `ChildBid`, `tfc.io.IOHandler`, `Pact`, `DatastoreType`, `PluginsContainer`, `RootHex`, `CacheKey`, `t.CallExpression`, `JavaRenderer`, `DaffCartLoading`, `CrossTable`, `IVectorV`, `ValidationState`, `HeaderItemProps`, `RequireFields`, `BatchResponse`, `Apollo.LazyQueryHookOptions`, `LogAnalyticsSourceExtendedFieldDefinition`, `TranslationFormat`, `FocusEvent`, `CurrencyFractions`, `MetadataProperty`, `IChannelSigner`, `BuildSettings`, `PullRequestOpened`, `Investor`, `IMainClassOption`, `VideoDetails`, `commonServices`, `ITaskChainFn`, `B0`, `ICompileProvider`, `ListElement`, `SnippetNode`, `FileAnnotationType`, `DropTargetSpec`, `DeviceID`, `FabricEventBase`, `RV`, `RouteLocationNormalized`, `Events.postcollision`, `ReportBuilder`, `_CollectorCallback2D`, `ConnectionFormService`, `ListImagesRequest`, `Semver`, `_https.RequestOptions`, `ChunkIndex`, `ChromeBadge`, `ParserEnv`, `AssignableDisplayObjectProperties`, `SelectAmount`, `ConfigureResponse`, `RuleSetRule`, `decimal.Decimal`, `DepositTransaction`, `VirtualWAN`, `SourceMapGenerator`, `GroupRepresentation`, `AnalysisOptions`, `StorageLayout`, `PdbStatusDetails`, `LoggerService`, `EventOptions`, `JsonFormsStateContext`, `AddAtomsEvent`, `SequenceDeltaEvent`, `TwingNode`, `ThyTableColumn`, `RouterConfigOptions`, `ProcessRequirementModel`, `Coin`, `RuntimeTransaction`, `IInventoryArmor`, `album`, `INodeProperties`, `FiltersState`, `core.ETHSignTx`, `PhaseModel`, `DeleteSlotTypeCommandInput`, `CollisionParts`, `PageElement`, `PositionDirection`, `IFluidDataStoreFactory`, `InstanceOf`, `TestWorkspaceFactory`, `ShipData`, `FullQuestionWithId`, `W6`, `NodeContentTree`, `Contribution`, `TraceIdentifier`, `AccountConfig`, `ReducerMap`, `CreateCatDto`, `RawResult`, `IslandsByPath`, `Hapi.ResponseToolkit`, `BasePackage`, `IItem`, `tl.VariableInfo`, `IDocumentFragment`, `IViewZoneChangeAccessor`, `SessionTypes.RequestEvent`, `UI5Type`, `Ternary`, `ServerMessage`, `PointInfo`, `DeleteUserCommand`, `TweenFunc`, `ITokens`, `NormMap`, `CurveCrossOutput`, `TabRepository`, `AppCheck`, `CustomerDTO`, `ClientIntakeFormIded`, `ElementX`, `ts.NodeFactory`, `CompositeReport`, `fs.PathLike`, `ClientQuery`, `Memo`, `ec`, `BindingItem`, `ListUserProfilesCommandInput`, `RoleHTMLProps`, `CurrentVersion`, `ListApplicationsRequest`, `ThingDescription`, `ListRecommendationsCommandInput`, `SymbolIntervalFromLimitParam`, `TInjector`, `BufferStream`, `ISceneActor`, `HdBitcoinPaymentsConfig`, `InitializeHandlerArguments`, `Nerve`, `TreeBudgetEvent`, `SalesInvoiceModel`, `TextSegment`, `ArgonWebView`, `EnumProperty`, `RequestChannel`, `config`, `SessionId`, `ParticleEmitterWrapper`, `OriginOptions`, `protocol.FileLocationOrRangeRequestArgs`, `FloatOptions`, `SFDefaults`, `ScriptObjectField`, `IVanessaEditor`, `ICXSetup`, `CRUDEvents`, `RMSPropOptimizer`, `TextOpComponent`, `DOMTokenList`, `CreateDeploymentCommandInput`, `TimeSpec`, `Kubectl`, `ChainablePromiseElement`, `GfxInputLayout`, `GrabListener`, `ExceptionBlock`, `CompositionContext`, `DeleteFileSystemCommandInput`, `Transformer`, `ArchiverError`, `DispatchPropsOfControl`, `kms.KmsManagementClient`, `IncrementalElement`, `SubCommand`, `AGG_TYPE`, `DaffCartReducerState`, `Mat`, `VariantObject`, `ILeaguePrices`, `io.WeightsManifestConfig`, `FocusEventInit`, `UIntTypedArray`, `IResolveResult`, `IQueryParams`, `UserError`, `INodeHealthStateChunk`, `CreepActionReturnCode`, `Hideable`, `FileEvent`, `RangeBucket`, `DisassociateMemberCommandInput`, `CoerceResult`, `ArrayLiteral`, `MerchantGoodsEntity`, `ProxyPropertyKey`, `CellGroup`, `ParsedPath`, `ListPingProbeResultsRequest`, `StyleDoc`, `ListKeyManagerModifierKey`, `LocalMicroEnvironmentManager`, `CommandDescriptor`, `Adapters`, `LinkedPoint`, `Parsers`, `IExpectedSiop`, `SuperResolutionConfig`, `DeploymentParametersDoc`, `CountingData`, `HomeReduerState`, `Heading`, `TestArgs`, `LitecoinjsKeyPair`, `FileSystemProvider`, `ConvaiCheckerComponent`, `InternalGroup`, `MethodDescriptorProto`, `EffectRenderContext`, `ResourceLines`, `AuthDispatch`, `DrawerInitialState`, `CategoryCollectionParseContextStub`, `GreenBean`, `RequiredAsset`, `Keymap`, `IRandom`, `SvelteSnapshot`, `PreparsedSeq`, `Stapp`, `MyServer`, `ObjectDefinitionBlock`, `LoopBackAuth`, `GradientPoint`, `BitwiseExpressionViewModel`, `PostcssStrictThemeConfig`, `SQLiteDatabase`, `Outlet`, `UserDTO`, `TeleportService`, `TabularData`, `SchemaEnv`, `Foo`, `VersionStatusIdentifier`, `SimpleProgramState`, `AddGroupUsersRequest`, `MutationFunctionOptions`, `ReadValue`, `Weapon`, `ITourStep`, `ArrayServiceTreeToArrOptions`, `DynamicTreeCollisionProcessor`, `Lyric`, `DocumentView`, `RtkQueryApiState`, `KeyValueStore`, `GcListener`, `d.EventOptions`, `PagingMeta`, `MeshInfo`, `OrderedAsyncIterableBaseX`, `Times`, `OutgoingMessage`, `Buf`, `ListRetainedMessagesCommandInput`, `EffectScope`, `NzMessageRef`, `CountQueryBuilder`, `DBInstance`, `CircuitGroup`, `Overlay`, `ListDomainNamesCommandInput`, `CursorMap`, `TestViewport`, `MapIncident`, `StateAccount`, `CreateConnectionCommand`, `TertiaryButtonProps`, `Limiter`, `IEntity`, `ResponseWithBodyType`, `VisTypeAlias`, `BinaryBitmap`, `DbPush`, `MenuState`, `BitcoinjsKeyPair`, `HitsCounterProps`, `Encoder`, `StacksNode`, `WhereFilterOp`, `LeftRegistComponentMapItem`, `VpnPacketCaptureStopParameters`, `Functor3`, `SignatureHelp`, `TCollection`, `BlockDeviceMapping`, `ObjectID`, `LexicalScope`, `PlatformType`, `browser.runtime.MessageSender`, `messages.Location`, `EmailVerificationToken`, `GraphQLSchemaNormalizedConfig`, `ISPTermObject`, `WindowOptions`, `Text`, `TupleType`, `MockTask`, `StatsFieldConfiguration`, `EventSummary`, `ex.Engine`, `Accounts`, `ProductResult`, `ArrayOrSingle`, `TagRenderingConfigJson`, `SassError`, `CategoryRecordsDict`, `WithGetterString`, `SharingSessionService`, `StringUtf8CV`, `UndelegateBuilder`, `TextWidthCache`, `ManagedListType`, `IStateBase`, `Rx.Observable`, `NTPTimeTag`, `UpdateApiKeyCommandInput`, `FundedAward`, `BorrowingMutex`, `FunctionTypeBuildNode`, `DynamoDB.DocumentClient`, `OffsetConnectionType`, `NoopExtSupportingWeb`, `V1Secret`, `SignInState`, `RequestInfoUtilities`, `ReplayDataMediator`, `ModelConstructor`, `ApiLocatorService`, `CreateClusterRequest`, `AudioTrack`, `DateHelperService`, `RenderBatch`, `StatFilter`, `AnyEntity`, `PlasmicTagOrComponent`, `SymbolOriginInfo`, `AddArrayControlAction`, `ObjectiveModel`, `Animated.Value`, `RefSet`, `IAppError`, `ConfigInfo`, `MetadataSchema`, `ServerUtil`, `GethInstanceConfig`, `EditableEllipse`, `DAL.DEVICE_NULL_DEREFERENCE`, `Signed`, `MotionData`, `ChoiceValue`, `Calculator.Client`, `AssetMap`, `JRes`, `TypeScriptType`, `ProviderInfrastructure`, `UseTimefilterProps`, `Monitoring`, `ActorRef`, `HdBitcoinCashPayments`, `CirclinePredicateSet`, `Resolve`, `IdentityContext`, `MongooseSchema.Types.ObjectId`, `JsonRpcClient`, `PSIDataType`, `DocUrl`, `CombatEncounter`, `CGAffineTransform`, `CollateralizerContract`, `Float`, `StorageFormat`, `Chance`, `IFlexProps`, `Core.Position`, `CompressOptions`, `FsObj`, `OrganizationPolicySummary`, `d.ServiceWorkerConfig`, `ReadLine`, `Asm`, `FileWatcher`, `RemoteController`, `ConditionalTypeNode`, `QuantumElement`, `ViewerOut`, `DecisionPathPlotData`, `FreeStyle`, `BoundingRect`, `PostCollector`, `ReactiveVar`, `SGGroupItem`, `NameBindingType`, `Resume`, `AttributeValueSetItem`, `NavigationActions`, `UINavigationBar`, `ChangeDescription`, `DOMRectReadOnly`, `Parser.Tree`, `AndroidMessagingStyle`, `BoolPriContext`, `Architecture`, `BeatmapDifficulty`, `GenericOperation`, `p5.Vector`, `FlatTree`, `JSXIdentifier`, `PhotoSize`, `P2PNodeInfo`, `instance`, `RoomClient`, `Apple2IO`, `GF`, `VersionMismatchFinder`, `Answerable`, `QueryOptions`, `ExtendedObject3D`, `NxValues`, `Dic`, `Stream`, `LayerService`, `AccountSteam`, `JsonRpcId`, `PayloadMetaAction`, `StructureNode`, `tfc.NamedTensorMap`, `At`, `Versions`, `vscode.TextDocument`, `EnvironmentManager`, `BRepGeometryInfo`, `SnippetsMap`, `IBrowsers`, `TransferParameters`, `RedocThemeOverrides`, `PanelSocket`, `DragPanHandler`, `NetworkParams`, `WidgetAdapter`, `StringASTNode`, `PreviewPicture`, `ReadonlyNonEmptyArray`, `turfHelpers.Feature`, `TableSearchRequest`, `LVal`, `PlanetApplicationRefFaker`, `superagent.Response`, `monaco.Range`, `DropInPresetBuilder`, `DaffCategoryFilterEqualRequest`, `StyleManager`, `ActionConnector`, `StationService`, `TExpected`, `IZosFilesResponse`, `MonoSynth`, `apid.ReserveId`, `FlexConfigurationPlugin`, `OncoprintWebGLCellView`, `ContactEmail`, `ActionsService`, `UserProvider`, `OwnerKeyInfoType`, `FilterOperator`, `ResponseMeta`, `ITranslationResult`, `IPercentileRanksAggConfig`, `ITx`, `Prisma.Sql`, `DeleteVolumeCommandInput`, `Modal`, `IModulePatcher`, `OwnPropsOfControl`, `requests.ListListingsRequest`, `TestActionContext`, `It`, `P9`, `SettingsCallback`, `IItemBase`, `StandardTableColumnProps`, `Aperture`, `LayoutOption`, `Chai.AssertionStatic`, `interfaces.Unbind`, `OptimizeJsOutput`, `ImportIrecDeviceDTO`, `WildlingCard`, `DaffCategory`, `OidcState`, `IRepositoryState`, `SagaMiddleware`, `RequestInterface`, `OpenLinkComponent`, `MessageEmbeddedImage`, `FlagValidatorReturnType`, `StructResult`, `StatusChartStatusMesh`, `ConstraintService`, `VisTypeAliasRegistry`, `Conditions`, `PluginApi`, `IDistributionDelta`, `DeclinationDictionary`, `TransmartConstraint`, `IEffect`, `HistoryStore.Context`, `ClassWeightMap`, `CodeActionParams`, `TheWitnessGlobals`, `ModelCompileArgs`, `ts.TypeQueryNode`, `Requests`, `InjectorServer`, `StateNamespace`, `InventoryItem`, `Ycm`, `TransportRequestOptionsWithMeta`, `ApiAction`, `SendInfo`, `CallbackAction`, `OnPreRoutingResult`, `Continuation`, `TranslateOptions`, `MapService`, `DependencyStatus`, `CloudWatch`, `IPostMessageBridge`, `LoggedInUser`, `ecs.ContainerDefinitionOptions`, `MethodVisitor`, `CppBytes`, `ReactInstance`, `JoinDescriptor`, `UserInterface`, `IActionTrackingMiddleware2Call`, `LabelValues`, `NextCurrentlyOpened`, `StateChannelExitClaim`, `DetectionResultItem`, `JSONIngestionEvent`, `requests.ListReplicationSourcesRequest`, `LabeledScales`, `ProposeCredentialMessage`, `Base64Message`, `TilePathGroup`, `StringAsciiCV`, `ActionStepType`, `VarUsages`, `IListFunctionOptions`, `BehaviorObservable`, `LocalStorageService`, `RequestSuccessCallbackResult`, `DraggingPosition`, `GiphyService`, `Alignment`, `ConfigConfigSchema`, `Group1524199022084`, `ExtensionReference`, `CameraStrategy`, `PluginEvent`, `Events.precollision`, `StopPipelineExecutionCommandInput`, `WsConnection`, `NoteSize`, `IconSvg`, `LGraphNode`, `UpdateProjectResponse`, `TestAwsKmsMrkAwareSymmetricKeyring`, `DiffSettings`, `AsyncExecutor`, `LayerConfigJson`, `Effect`, `ProjectActions`, `IAureliaComponent`, `ComponentCompilerLegacyContext`, `ODataStructuredType`, `Controls`, `EventActionHandlerActionCallableResponse`, `Json.ObjectValue`, `CardRenderer`, `CacheContextContract`, `StatusActionQueue`, `PassThrough`, `LocationResult`, `SubmissionObjectState`, `Booking`, `TexturePalette`, `func`, `ActiveLabel`, `NodeStore`, `Child`, `ComponentTypeTree`, `FaunaCollectionOptions`, `CrowdinFileInfo`, `HeapObject`, `ApiTreeItem`, `DeleteRoomRequest`, `LanguageDetectorAsyncModule`, `ItemService`, `ChartDataset`, `RegularizationContext`, `WrappedDocument`, `FoundOrNot`, `AssetData`, `ResultType`, `PhotoData.PhotoDataStructure`, `ActiveSession`, `IBlockchainsState`, `RewardManager$1`, `F2`, `AttrMutatorConfig`, `DidState`, `MessageGeneratorImplementation`, `IJsonRpcRequest`, `ArenaSelection`, `FirestoreUserField`, `ShapeInstanceState`, `LazyBundlesRuntimeData`, `TList`, `MockEvent`, `MigrateEngineOptions`, `AppMessage`, `Server`, `IChannelManager`, `ListQueuesCommandInput`, `BackendTimingInfo`, `kKeyCode`, `CollisionSolver`, `ParquetData`, `Painter`, `BorderConfig`, `SceneActivationCCSet`, `PaddingMode`, `Key3`, `ColorFilter`, `IWireMessage`, `RestModelEntry`, `BaseReasonConfig`, `SectionOptions`, `ESRuleConfig`, `ReduxAction`, `SignedMultiSigContractCallOptions`, `RequestWithdrawalDTO`, `PluginInfo`, `CipherObject`, `ToasterService`, `NullableSafeElForM`, `TokenKind`, `CameraFrameListener`, `GraphQLModulesModuleContext`, `ServerClient`, `BasicTemplateAstVisitor`, `SocketStream`, `SPDestinationNode`, `flags.Discriminated`, `ScrollInfo`, `ParserService`, `SurveyMongoRepository`, `cdk.GetContextValueResult`, `Setdown`, `TransactionId`, `ConnectionArgs`, `CLI_OPTS`, `RenderOption`, `ENSService`, `IExportProvider`, `MetricsGraphicsEventModel`, `ProjectRole`, `NVMParser`, `IOContext`, `ShellExecution`, `CompileUtil`, `PoiInfo`, `StyleDefinition`, `CombatStateRecord`, `ListTemplateVersionsCommandInput`, `SVGUseElement`, `RouterDirection`, `HsDialogItem`, `UpdateModelCommandInput`, `ErrorListener`, `ListConfigurationsRequest`, `InheritanceNode`, `Raffle`, `Atom.Point`, `CallResult`, `SelectionRange`, `WrappedCodeBlock`, `SlatePluginDefinition`, `TilingScheme`, `FixedTermLoan`, `ParamInstance`, `JitsiLocalTrack`, `ManyToManyOptions`, `SwitcherFields`, `ContractWhiteList`, `EdgeMaterialParameters`, `JSXOpeningElement`, `RoleType`, `DebugLogger`, `UseTransactionQueryReducerAction`, `Mount`, `GfxCompareMode`, `ExportAssignment`, `ValProp`, `TDestination`, `IRawHealthEvaluation`, `AuthDataService`, `TransactionMetadata`, `PlatformInformation`, `BaseFactory`, `ChartSeries`, `WebConfig`, `NonTerminal`, `IObserverHandle`, `messages.GherkinDocument`, `React.CompositionEvent`, `LocalStorageAppenderConfiguration`, `LanguageServerConfig`, `PlatformUtilsService`, `SFPage`, `FakeHttpProvider`, `WalletStore`, `TCity`, `TraceOptions`, `WetLanguage`, `GenesisBlock`, `H.Behavior`, `UserNotification`, `RetryStrategy`, `RouterData`, `FSJetpack`, `IServiceLocator`, `FilterQuery`, `AnnotationData`, `Graph`, `child_process.SpawnSyncReturns`, `TileMetadataArgs`, `requests.ListVmClusterUpdatesRequest`, `NodeSnapshot`, `requests.ListMultipartUploadPartsRequest`, `vscode.DebugSession`, `ClientService`, `JsxAttributes`, `TokenStat`, `TInput`, `ProcessAccountsFunc`, `TabStorageOptions`, `ExprWithParenthesesContext`, `execa.Options`, `DescribeResourceCommandInput`, `CFCore`, `EzBackendInstance`, `IPartialDeploymentTemplate`, `ResourceDefinition`, `DocumentHighlightParams`, `UnitWithSymbols`, `MediaFormat`, `TheDestinedLambdaStack`, `TransformFactoryContext`, `NodeInterface`, `DataProperty`, `Seam`, `ControlFlowEnd`, `PerSignalDetails`, `SpatialViewState`, `GuaribasUser`, `ClientHttp2Stream`, `ResourcesFile`, `uibPagination`, `ConditionExpression`, `FormatRange`, `CustomUrlAnomalyRecordDoc`, `FillerHook`, `IFormControlContext`, `IInputHandler`, `CreateConnectionResponse`, `CompositeTreeNode`, `d3Selection.Selection`, `EntryObj`, `ConnectionInfoResource`, `MockXMLHttpRequest`, `ArrayLike`, `MemoryArray`, `IFunctionWizardContext`, `NavigableHashNode`, `CssClassMap`, `IndexStore`, `ParameterSpec`, `HTMLHRElement`, `Atomico`, `ValidateOptions`, `IFilterModel`, `ListDatasetsCommandInput`, `ListOption`, `SqlQuery`, `TodoDataService`, `InternalMetric`, `EdmxEnumType`, `Fund`, `BroadcasterService`, `Folded`, `IChunk`, `UICollectionViewCell`, `RecursiveStruct`, `Loc`, `TypedArrays`, `MatButtonToggle`, `IExtent`, `AuthenticateResultModel`, `ThyDragDirective`, `RequestBodyParser`, `Playlist`, `PredictableStepDefinition`, `Revalidator`, `FocusOrigin`, `k`, `angular.IHttpPromise`, `BoundSphere`, `Browser.Interface`, `OrgID`, `Web3SideChainClient`, `ArrowProps`, `TTransport`, `StorageBackend`, `C1`, `VersionPolicy`, `IServerSideGetRowsRequest`, `InternalKey`, `ExecutionProbe`, `Grant`, `CustomEndpointDetails`, `KeyboardState`, `Card`, `ModuleWrapper`, `MenuProps`, `PropertyInjectInfoType`, `FiniteIEnumerator`, `ContinuousParameterRange`, `comicInterface`, `PlayerStat`, `AdvancedDynamicTexture`, `Joplin`, `CompileRepeatUtil`, `CT`, `TFolder`, `Red`, `RX.Types.SyntheticEvent`, `ThemeUIStyleObject`, `SpatialImageEnt`, `MonitorSummary`, `RNConfig`, `Leaf`, `ApiController`, `SinonMock`, `AckFrame`, `RealtimeController`, `BinaryMap`, `GfxCullMode`, `VRMBlendShapeGroup`, `Deferrable`, `IElem`, `RaribleProfileResponse`, `Skeleton`, `DataBuckets`, `XPCOM.nsIJSID`, `Emoji`, `ConditionalExpression`, `MatBottomSheetRef`, `InnerPlugin`, `AlertResult`, `ManglePropertiesOptions`, `MonitoringConfig`, `_Record`, `RollupChunkResult`, `ProjectSpecBase`, `com.github.triniwiz.canvas.ImageAsset.Callback`, `IProcessedStyleSet`, `Text_2`, `FormatFactory`, `IBehavior`, `IJSONInConfig`, `OcsHttpError`, `requests.ListVolumeBackupPoliciesRequest`, `BlockOutputFormat`, `UntagResourceCommandInput`, `ShapeType`, `VarExpr`, `IAuthContextData`, `TConvData`, `ProxyableLogDataType`, `NonEmptyList`, `JsonRpcHttpClient`, `ModulusPoly`, `DropdownMenuInitialState`, `CommandSetting`, `ReferenceContext`, `BaseScreen`, `ListComprehensionNode`, `AggregationResponse`, `ContainerModule`, `ITargetInfoProps`, `FindProjectsDto`, `AccessTokenResponse`, `Spinner`, `BuddhistDate`, `IntentSummary`, `FSEntry`, `Hardfork`, `SymbolData`, `TraceId`, `OutUserInfoPacket`, `CdtEdge`, `Dialogic.DefaultDialogicOptions`, `LogMatchRule`, `GestureDetail`, `DFA`, `MyWindow`, `Placeholder`, `HEventType`, `httpm.HttpClientResponse`, `EditableCircle`, `ProjectUser`, `VirtualMachineScaleSet`, `ProgressBarEvent`, `OfficialAccount`, `EToolName`, `ColRef`, `TrackEventParams`, `RequestApprovalTeam`, `ComponentGeneratorOptions`, `FoamWorkspace`, `WrappingMode`, `PropertyFlags`, `ValidResource`, `ModelCallbackMethod`, `InsertNodeOptions`, `RegistrationDTO`, `DevServerService`, `requests.ListModelDeploymentShapesRequest`, `IBuildConfig`, `QueueObject`, `StringEncoding`, `CommandLineToolModel`, `IPropertyIdentValueDescriptor`, `HistoryRPC`, `yargs.Arguments`, `DeployedServicePackage`, `CliProxyAgent`, `WriteContext`, `ColorSchemaOptionsProps`, `BleepsSetup`, `CropperTouch`, `Constants`, `OptsChartData`, `UserEnvelope`, `NodeDecryptionMaterial`, `UserDomain`, `Just`, `AnimationOptions`, `HdBitcoinPayments`, `DefaultTreeNode`, `ComparisonOperator`, `DraftHandleValue`, `AngularFireUploadTask`, `VariableType`, `JSDocMethodBodyCtx`, `EditorRenderProps`, `CastEvent`, `RegisterData`, `VertexLabels`, `alt.Vector3`, `selectionType`, `ModifyDBParameterGroupCommandInput`, `GraphicMode`, `BoundPorts`, `SelectionInterpreter`, `ResourceCacheData`, `Terms`, `DispatchedPayload`, `IGeneralFunctions`, `d.FsItems`, `SetOpts`, `ArrayCollection`, `ISurveyObjectEditorOptions`, `EntityTypeProperty`, `TimefilterSetup`, `estypes.QueryDslQueryContainer`, `DocumentSymbolParams`, `DetailedReactHTMLElement`, `BinanceConnector`, `Ng2StateDeclaration`, `AnyObject`, `React.VFC`, `ConsoleMessageLocation`, `GetConfigCommandInput`, `LocalStorageArea`, `LogWriteContext`, `xlsx.CellObject`, `ThyUploadResponse`, `ReactiveCommand`, `UserRefVO`, `webpack.LoaderContext`, `wdpromise.Promise`, `Debugger`, `Codeblock`, `Definition`, `CreateExperimentCommandInput`, `AnyApi`, `SearchIndex`, `ZRC2Token`, `RendererElement`, `AirUnpacker`, `NodeKeyJSON`, `InstanceFailoverGroup`, `MessageComponentInteraction`, `Contract`, `IInviteGroupUsersOptions`, `NodeEvaluateResult`, `ts.Block`, `MangaFields`, `Insight`, `StaffService`, `DataAction`, `UserReference`, `DateTimeOffset`, `JSONDiff`, `HsCommonEndpointsService`, `ResponseFormat`, `SecretWasmService`, `ActionRuntimeContext`, `BindGroupLayout`, `DescribeChannelCommandInput`, `ScaleBand`, `IClusterClient`, `LinesTextDocument`, `ParseTreePattern`, `GXShapeHelperGfx`, `IterationService`, `GlobalsSearch`, `HTMLLabelElement`, `LinkI`, `FragmentDefinitionNode`, `Arguments`, `AccountGoogle_VarsEntry`, `DecodedData`, `ObiDialogNode`, `Lambda`, `ManagementDashboardTileDetails`, `IPicture`, `UpdateDataSetCommandInput`, `EventHandlerInfo`, `StateSelectors`, `SimpleRange`, `MountedBScrollHTMLElement`, `UserIDStatus`, `ZRImage`, `CSharpInterface`, `firestore.DocumentSnapshot`, `MutationObserver`, `DictionaryQueryEntry`, `Chainable`, `MDCAlertAction`, `SummaryItem`, `PubKey`, `Bidirectional`, `MetricsSourceData`, `HdBitcoinCashPaymentsConfig`, `SimpleChoiceGameState`, `PipetteOffsetCalibration`, `QueryJoin`, `Metric`, `ExpoAppManifest`, `CredentialRepresentation`, `OnePoleFilter`, `Quaternion`, `LSTMCell`, `SchemaVisitorFactory`, `Glossary`, `ICordovaLaunchRequestArgs`, `IRef`, `HTMLIonAlertElement`, `Fn1`, `SpekeKeyProvider`, `RegistryPackage`, `apid.StreamId`, `DeviceConfigIndex`, `NzResizeObserver`, `CheckStatus`, `CameraKeyTrackAnimationOptions`, `DurationUnit`, `NotificationAndroid`, `SafeStyle`, `IStorageScheme.IStorage`, `ts.ForInStatement`, `JoinClause`, `QueryBuilderFieldProps`, `SettingNavigation`, `CrochetValue`, `ParentBid`, `FlashcardFieldName`, `Work_Client.WorkHttpClient2_1`, `PrimedCase`, `AzurePipelinesYaml`, `Int32Array`, `StageSwitchCtrl`, `CacheNode`, `TCmdData`, `IMaterial`, `ObservableEither`, `BindingDirection`, `ListDiscoveredResourcesCommandInput`, `Id64Arg`, `CornerFamily`, `LocatorExtended`, `MockStoreEnhanced`, `Extensions`, `ExtraPost`, `CoreService`, `ListFunctionsRequest`, `MsgCreateLease`, `XmlNode`, `StylingContext`, `IMutableQuaternion`, `NetworkId`, `StackContext`, `AuthorizationRequest`, `NormalItalic`, `CombatantState`, `Biota`, `GameManager`, `TVSeason`, `BoardService`, `PathCursor`, `requests.ListMetricsRequest`, `App.ui.INotifications`, `PluginDomEvent`, `DeviceService`, `ActivationLayer`, `d.CompilerBuildStats`, `Web3EventService`, `WriteStorageObject`, `DatepickerDialog`, `Application.RenderOptions`, `ABI`, `PackageManagerCommands`, `BuilderState`, `LiveActor`, `DAL.KEY_6`, `nsIDOMWindowUtils`, `MVideo`, `RawConfigurationProvider`, `DDSTextureHolder`, `TimeSlot`, `PolicyResponse`, `OnSetOptionsProps`, `TemplateFile`, `StackCardInterpolatedStyle`, `RoleManager`, `PerformDeleteArgs`, `PreActor`, `Vec4Sym`, `int32`, `ProcessGraphic`, `HomogeneousPatternInfo`, `ModelStoreManager`, `Rebind`, `ParsedRequestUrl`, `ListResourcesCommandInput`, `EmojiParseOptions`, `IDBPDatabase`, `NativePlatformResponse`, `GfxrResolveTextureID`, `AuthenticateFn`, `ShortUrl`, `UnsubscribeCommandInput`, `BigNumberish`, `TimelineElement`, `JPA.JPAResourceData`, `PropertyAnimation`, `MessageContent`, `MyView`, `IGLTFExporterExtensionV2`, `ManualServer`, `DAVCalendar`, `UserItem`, `EventAction`, `NgrxJsonApiStoreQueries`, `FolderOrNote`, `WebSocketClient`, `SVGNodeAttribute`, `SelectionType`, `MetadataPackageVersion`, `IMiddlewareProvider`, `ExchangeAccount`, `UnitType`, `UIScrollView`, `Modifiers`, `FakeCard`, `OrderPremiumRow`, `IExtension`, `SwapParams`, `BrowserFeature`, `FSNetworkRequestConfig`, `SelectMenuInteraction`, `Geometry`, `FocusTrapFactory`, `Fn4`, `ErrorContext`, `ID3Selection`, `IAnyStateTreeNode`, `WaitContextImpl`, `analyze.Options`, `LayoutChangeEvent`, `TriggerType.GITHUB`, `RecordSetWithDate`, `InspectorLogEntry`, `BlobClient`, `ResponseCallback`, `ApexDebugStackFrameInfo`, `JobSavedObjectService`, `coreAuth.TokenCredential`, `FontMetricsObject`, `PbSecurityPermission`, `BatchChain`, `CElement`, `ScrollRect`, `DeeplinkPayPayload`, `WithId`, `Set`, `AttachmentService`, `IMrepoDigestConfigFilePath`, `TwingCallable`, `BillingModifier`, `BentleyCloudRpcParams`, `ModuleDeclaration`, `ServeAndBuildChecker`, `ParjsCombinator`, `AnimationStateMetadata`, `ExecaReturnValue`, `IZoweJobTreeNode`, `ResultInfo`, `DataModels.TokenHistory.TokenHistoryGroup`, `TransientBundle`, `TeamType`, `InMemoryProject`, `Rules`, `TasksEntityStore`, `IStorageLayer`, `thrift.TField`, `FKRow`, `ServiceEnvironmentEndPointOverview`, `VariableValue`, `d3Transition.Transition`, `AC`, `CreateRulesSchema`, `RenderPassContext`, `BlockBlobClient`, `DeviceState`, `Match`, `MenuItemType`, `BasicSeriesSpec`, `GoGovReduxState`, `FrameGraphicsItem`, `debug.Debugger`, `AggsSetupDependencies`, `DestinationConfiguration`, `RoomTerrain`, `ExtensionConfig`, `ServiceState`, `ComponentTemplateDeserialized`, `UsePaginatedQuery`, `RouterStateSnapshot`, `URLLoaderEvent`, `GaugeVisualizationState`, `AgAxisLabelFormatterParams`, `IAuthStatus`, `instantiation.IConstructorSignature5`, `ICarsRepository`, `IPumpjack`, `CDP.Client`, `ElementState`, `BridgeProtocol`, `ResolvedModuleWithFailedLookupLocations`, `NzConfigKey`, `Advice`, `DocLinksStart`, `RawTree`, `StripeAddress`, `DrilldownState`, `GroupMember`, `CssPropertyOptions`, `IConfigurationExtend`, `SteeringPolicyRule`, `SwaggerOperation`, `InstancePoolInstanceLoadBalancerBackend`, `KMSKeyNotAccessibleFault`, `RnnStepFunction`, `TSelector`, `ProcessDataService`, `MlCapabilities`, `Category2`, `ThyTreeNodeCheckState`, `JsonFragment`, `RawCard`, `LLRBNode`, `TestingAggregate`, `IntervalContext`, `OrderItem`, `ClearCollections`, `UnstakeValidatorV1`, `ContainerPassedProps`, `GossipMemoryStore`, `ts.SignatureDeclaration`, `ArrayRange`, `Prompter`, `MutableTreeModel`, `XHRResponse`, `CommitSelectionService`, `DataEventEmitter.EventDetail`, `MatchPrefixResult`, `IColorSet`, `Templateable`, `IAchievement`, `DatabasePoolConnection`, `ViewTest`, `NelderMeadPointArray`, `NgxDropzoneService`, `NameT`, `ResourceField`, `SfdxFalconResultType`, `GDQLowerthirdNameplateElement`, `StorageObjectList`, `IWorld`, `Appointment`, `HsDrawService`, `SingleConnection`, `KeywordType`, `Progresses2Runners`, `FormInput`, `BracketTrait`, `Area`, `MigrationOptions`, `interfaces.BindingInWhenOnSyntax`, `ModelAndWeightsConfig`, `CloneFunction`, `ChannelContract`, `AstBlock`, `DocumentLinkShareState`, `LatLngBounds`, `GenesisConfig`, `SliceRequest`, `CircuitState`, `CategoryPage`, `FlowAssignmentAlias`, `E2`, `Selected`, `PkgJson`, `QueryService`, `TDeclarations`, `WebGLContextAttributes`, `StoryMenuItemProps`, `FileRecord`, `AccidentalMark`, `WorkerMsgHandler`, `ElementHandleForTag`, `ActorArgs`, `core.Keyring`, `SuccessCallbackResult`, `RepositoryWithGitHubRepository`, `ResultItem`, `DateOrString`, `TeamService`, `DeleteInvitationsCommandInput`, `FlexboxLayout`, `IDocumentOptions`, `RequestMethodType`, `DataSourceInstanceSettings`, `StatusView`, `IconSize`, `MyCompanyRowConfig`, `CommandBuildElements`, `SourceDocument`, `IObservableValue`, `ResponsiveFacade`, `ICXListHTLCOptions`, `ReadonlyNFA`, `NavbarElementProps`, `ScmDomain`, `BillPayer`, `PersistedStateKey`, `StravaActivityModel`, `UIMenuItem`, `Cached`, `PLSQLRoot`, `ParameterApiDescriptionModel`, `WhenToMatchOptions`, `requests.ListBudgetsRequest`, `AllSeries`, `FileSpan`, `DeploymentSubmission`, `ValidationControllerFactory`, `StructFieldInfo`, `ObjectSchemaProperty`, `UtilityNumberValue`, `SideType`, `IAllAppDefinitions`, `MDL0_MaterialEntry`, `IntermediateTranslationFormat`, `OutfResource`, `ConnectionNode`, `OverlayInitialState`, `NetworkResolver`, `DDL2.OutputDict`, `InterfaceTemplate`, `TransformComponent`, `FieldContextValue`, `ApplicationSettings`, `DynamoDbFileChange`, `ChartTemplatesData`, `OperatorType`, `SegmentGroup`, `UnformattedListItem`, `DynamicMatcher`, `ScryptedDeviceType`, `d.TransformCssToEsmOutput`, `NumberFilter`, `NgxsWebsocketPluginOptions`, `requests.ListAppCatalogListingsRequest`, `RepositoryOptions`, `MetricInterface`, `t_6ca64060`, `OperatorToken`, `ClientMessage`, `DescribeConnectorsCommandInput`, `ProductOptionService`, `CurveCollection`, `IPed`, `FilesChange`, `EditHistoryCommit`, `IDatabaseDataActionClass`, `Wrapper`, `ChartOptions`, `OnEffectFunction`, `IDiscordMessageParserResult`, `ModelSummary`, `ChatEvent`, `DecadeCell`, `Range2d`, `VpcSecurityGroupMembership`, `ResetPasswordAccountsRequestMessage`, `HttpStatusCodes`, `snowflake`, `Peer`, `DebugConfiguration`, `ExpNum`, `d.ResolveModuleIdResults`, `IFluidDependencySynthesizer`, `SGSCachedData`, `CmsConfig`, `PluginInsertAction`, `LoginSuccessPayload`, `LocaleProviderService`, `RuntimeEnvironment`, `Gzip`, `ModelDispatcher`, `GaxiosError`, `MockNode`, `SupabaseClient`, `TlcCode`, `INavFourProp`, `InheritedCssProperty`, `TrackFormat`, `IGameData`, `RegisterOptions`, `AnyArena`, `UserType`, `EntityEvictEvent`, `CalculationScenario`, `RangeInterface`, `requests.ListJobShapesRequest`, `IMinemeldConfigService`, `PreferenceChange`, `StatementsBlock`, `ExportData2DArray`, `TimerActionTypes`, `MessageBuffer`, `d.HydrateFactoryOptions`, `LocaleData`, `Redux.Reducer`, `DeleteConnectionResponse`, `ScriptContainer`, `SimpleGit`, `ApiOperationOptions`, `LegendData`, `CreateEncryptedSavedObjectsMigrationFn`, `Ped`, `AssignNode`, `TableDimension`, `ListGroupsCommandInput`, `Completion`, `AsExpression`, `InMemoryEditor`, `PokemonIdent`, `FeedService`, `Entities`, `Indent`, `VisualizeServices`, `KinesisFirehoseDestination`, `FailureInfo`, `NgmslibService`, `DocumentType`, `WebGLVertexArrayObjectOES`, `V`, `NumberEdge`, `CohortPatient`, `HandlebarsTemplateDelegate`, `IndexedNode`, `LogType`, `Lifecycle`, `PedComponent`, `Dialogue.Config`, `ComponentChildren`, `TSPropertySignature`, `ScriptDataService`, `MediaProviderConfig`, `FilterMetadataStatusValues`, `ListArtifactsCommandInput`, `RequestBuilder`, `ICategory`, `ObservableLike`, `Income`, `JSONTree`, `MockWebSocketClientForServer`, `RectF`, `SaveEntitiesCancel`, `InstallState`, `InvalidStateException`, `EnumValueDescriptorProto`, `AbstractSqlModel`, `CasparCGSocketResponse`, `Loadbalancer`, `cytoscape.EventHandler`, `FunctionMethodsResults`, `ReflectionProbe`, `ButtonProps`, `CircuitInfo`, `uinteger`, `TButtons`, `PipeTransform`, `LegacyField`, `OperationLink`, `ComputedGeometries`, `Images.Dimensions`, `Router.RouterContext`, `NetworkSubgraph`, `EditModes`, `IndicatorObject`, `IRuleApiModel`, `ScrollOptions`, `PresentationPreview`, `WebElementWrapper`, `PlayState`, `AnimatorState`, `LengthPrefixedList`, `JsonDocsSlot`, `MountedHttpHandler`, `SearchInWorkspaceOptions`, `SavedObjectsIncrementCounterOptions`, `EmacsEmulator`, `DaffNewsletterSubmission`, `digitalocean.Account`, `ElectronCertificate`, `Sort`, `PullRequest`, `PolygonEditOptions`, `DocFn`, `DTONames`, `CreateParameterGroupCommandInput`, `AboutService`, `EaseItem`, `WindowCorrection`, `ZoneAwarePromise`, `DebounceSettings`, `ElasticsearchResponseHit`, `Student`, `SlackOptions`, `Dino`, `ProviderSettings`, `TaskType`, `PermissionResolvable`, `BlockCache`, `TileCoords2D`, `FileUpload`, `Point3F`, `LunarMonth`, `BlockPos`, `IndexedPolyfaceVisitor`, `ImmutablePerson`, `MDCListIndex`, `IOmnisharpTextEditor`, `EVMEventLog`, `AngularDirective`, `MongoCallback`, `ListSuppressionsRequest`, `OutdatedDocumentsSearchRead`, `UpdateStreamCommandInput`, `RawSavedDashboardPanelTo60`, `ProtractorBrowser`, `PieSeries`, `IInspectorListItem`, `MikroORM`, `AccountsServer`, `ItemStat`, `OptionDetails`, `DocReference`, `CertificateAuthorityTreeItem`, `TypeWithId`, `ScmRepository`, `FoundationElementRegistry`, `LocalVideoStream`, `requests.ListMultipartUploadsRequest`, `CheckboxGroupProps`, `TestScriptResult`, `SmallMultipleScales`, `CubeTexture`, `AnimeNode`, `CaptionSelector`, `ComponentClass`, `StagePanelLocation`, `DataSeries`, `SaveDialogOptions`, `ProjectChangeAnalyzer`, `IArtist`, `StandardAuthorization`, `BuildPipelineParams`, `IAutorestLogger`, `SchematisedDocument`, `IShellMessage`, `HTMLSelectElement`, `CreateFilterCommandInput`, `TenantSettingService`, `NamespaceGetter`, `StockState`, `IProjectsRepository`, `NavigateToPath`, `DailyApiRequest`, `ValidationContext`, `TransformedStringTypeKind`, `EventBody`, `core.App`, `MarkovChain`, `ExpenseService`, `PathTree`, `MessageId`, `InspectReport`, `MergeProps`, `NumberFormat`, `SyscallManager`, `ThemeColors`, `PointObject`, `TimeZone`, `AggTypeFilter`, `VstsEnvironmentVariables`, `GetPointFieldFn`, `RectDelta`, `CompaniesService`, `VirgilCrypto`, `AstRoot`, `Slider`, `SubmissionProgress`, `Sash`, `ethers.providers.JsonRpcProvider`, `ISiteScriptActionUIWrapper`, `GfxrAttachmentSlot`, `GetIamPolicyRequest`, `EditDoc`, `Node_Interface`, `EmitterInstance`, `GridDimensions`, `AnimatedInterpolation`, `DocumentationResult`, `SelectorSpec`, `ButtonStyle`, `VersionedSchema`, `IProjectConf`, `CreatePolicyCommandInput`, `SUCUpdateEntry`, `NavNodeInfoResource`, `DemoteGroupUsersRequest`, `ConnectionConfiguration`, `SearchRequest`, `PythonShell`, `TreeIterator`, `ChromeBreadcrumb`, `requests.ListKeyVersionsRequest`, `UpdateConfigurationCommandInput`, `MethodAst`, `TypeInference`, `DiffColumn`, `TLPointerInfo`, `RenderTreeDiff`, `TSPass`, `RigidBodyComponent`, `K6`, `ReactEditor`, `LaunchOption`, `Properties`, `ColorTokens`, `Classification`, `OverlayChildren`, `Fn2`, `UI5Class`, `AnyStandaloneContext`, `GlyphSet`, `RequestArgs`, `RefactoringsByFilePath`, `Screwdriver`, `Visual`, `MyModule`, `PostDocument`, `MultilineTextLayout`, `JsonObject`, `SwaggerMetadata`, `BasicCCReport`, `FileStatusBar`, `ThyDropPosition`, `TestCLI`, `GoThemeBackgroundCSS`, `Photo`, `HitEvent`, `QPixmap`, `DropHandler`, `XMLHTTPRequestMock`, `LoDashStatic`, `IInterpolatedQuery`, `SprintfArgument`, `Timers`, `IPlatformService`, `I18nConfig`, `PBXGroup`, `DefaultEditorControlsProps`, `TInsertAdjacentPositions`, `SerializeOptions`, `FounderConfig`, `PublicationViewConverter`, `FurMulti`, `IBinaryData`, `IterableProtocol`, `Authentication`, `EvActivityCallUI`, `ColorPoint`, `HTTP`, `ILog`, `TFileOrSketchPartChange`, `MiddlewareOptions`, `TutorialContext`, `TransactionWithBlock`, `Koa`, `AES`, `Checksum`, `WriteFileOptions`, `BatchedFunc`, `ForumActionType`, `DecoderOptions`, `vscode.NotebookDocument`, `IFormState`, `KeyframeTrackType`, `CalculatedColumn`, `ExprListContext`, `RTCRtpReceiveParameters`, `RTCStatsReport`, `IndexData`, `ServiceWorkerState`, `ForbiddenWordsInfo`, `ConflictNode`, `Regl`, `MatomoTracker`, `RouterLoaderOptions`, `FsWriteOptions`, `Controller2`, `CurveFactory`, `TransactionUnsigned`, `IGetActivitiesStatistics`, `WorkNodePath`, `SoundConfig`, `GeistUIThemes`, `TickOptions`, `TestSink`, `AbbreviationNode`, `PatchRequest`, `Ratio`, `RouteNode`, `ThunkDispatch`, `BinaryPaths`, `ExternalServiceIncidentResponse`, `DatabaseIndexingService`, `NameIdentifier`, `PerSideDistance`, `ManifestBuilder`, `AppSettingsFormValues`, `ZxBeeper`, `ReadAndParseBlob`, `PaymentInformation`, `SpaceSize`, `Attributions`, `SocketIOGraphQLClient`, `AccountCustom_VarsEntry`, `MediaListOptions`, `NodePath`, `ListExportsCommandInput`, `ShippingMethod`, `Function2`, `WritableAtom`, `WebsiteScanResult`, `SimpleAttribute`, `TimeSlotService`, `ValidatorModel`, `Owner`, `SwitchNodeParams`, `PathResolverResult`, `DryadPlayer`, `TransitionService`, `TypedNode`, `HierarchicalFilter`, `OperatorDescriptor`, `XmppChatAdapter`, `VuexModuleOptions`, `PartialParam`, `TypographyDefinition`, `requests.ListViewsRequest`, `GroupPanel`, `JSONSchema3or4`, `OpsMetrics`, `ClassAst`, `TRecursiveCss`, `GetStageCommandInput`, `ListIdentityProvidersCommandInput`, `protos.common.SignaturePolicy.NOutOf`, `IAbstractGraph`, `GLRenderPassContext`, `IControl`, `LazyBundleRuntimeData`, `IReportingRule`, `BufferGeometry`, `GetFederationTokenCommandInput`, `FileRenameEvent`, `IMetricsService`, `FileVersionSpec`, `CostMatrix`, `DeleteSecurityProfileCommandInput`, `ManagedInstance`, `ApiRecord`, `IsSelectableField`, `EosioContractRow`, `TestAccountProvider`, `WorldgenRegistryHolder`, `DeleteNetworkProfileCommandInput`, `BMDData`, `FSNode`, `RenderAPI`, `ActionHandlerRegistry`, `Loggable`, `Shortcut`, `FunctionFlags`, `CodePddlWorkspace`, `GX.CA`, `TimeoutJobOptions`, `Describe`, `LegendItem`, `IJsonSchema`, `WithPLP`, `IFilterListRow`, `VisualObjectInstanceEnumeration`, `ContractFunction`, `SyntaxModifier`, `AudioDeviceInfo`, `ListServersCommandInput`, `OrderedId64Iterable`, `WhereOptions`, `ListDomainsCommandInput`, `GlobalCoordinates`, `CompSize`, `CoreTypes.dip`, `jqXHR`, `GrammaticalGender`, `OsmNode`, `LoadedVertexData`, `PROTOCOL_STEPS_ID`, `FormBuilder`, `ExtendedCompleteItem`, `AnimVector`, `SignedMessageWithTwoPassphrases`, `ColumnSubscription`, `ScoreStrategy`, `DragHelperTemplate`, `TargetTrackingConfiguration`, `TodoController`, `ReadableStreamDefaultReadResult`, `RecordList`, `TagConfiguration`, `Web3ClientInterface`, `GetAppDefinitionParams`, `SparqlItemService`, `AbstractOptions`, `dia.Cell`, `SimulcastUplinkObserver`, `SupportedModels`, `ConfigMap`, `DAOMigrationParams`, `OrderDetailService`, `App.windows.window.IOverlay`, `RenderQueue`, `TransactionView`, `MatrixItem`, `WorkspaceState`, `ItemContext`, `MatchmakerMatched`, `ParameterConfig`, `BitstreamFormatRegistryState`, `SwimLane`, `DisplacementFeature`, `Texture_t`, `CandleStick`, `FourSlash.Range`, `TNodeReturnValue`, `AppSources`, `ModifierFlags`, `GameplayClock`, `MediationRecord`, `MockDeploy`, `ThyNotifyService`, `AST.Module`, `IAccordionItemContextProps`, `HasTagName`, `RawDraftContentState`, `OfficeApp`, `ExpressionListContext`, `ForwardInfo`, `Highcharts.MapLatLonObject`, `B8`, `nodes.RuleSet`, `ColumnFormat`, `SubtitlesState`, `Refetch`, `IMinemeldPrototypeService`, `MarkdownSection`, `ForgotPasswordAccountsRequestMessage`, `WindowProtocol`, `SShapeElement`, `KeyRowEvent`, `AnyGradientType`, `EdgeCalculator`, `ClonePanelAction`, `Claimants`, `ApmFields`, `BaseFilterInput`, `IStyleCollection`, `ICardProps`, `BottomNavigationTab`, `IGherkinDocument`, `DraggableData`, `BillingGroupCosts`, `QCfg`, `AddonEnvironment`, `RetentionPeriod`, `Cropping2D`, `CancellationStrategy`, `DirectiveProfile`, `NotebookCellOutput`, `NodeFactory`, `HttpResponseBadRequest`, `PathEdge`, `LoggerContextConfigType`, `PiEditConcept`, `PiStyle`, `PluginDevice`, `PassthroughLoader`, `News`, `ProofCommandResponse`, `PriorityListGroup`, `ChartModel`, `DisplayLabelsRequestOptions`, `Metrics`, `KeyPath`, `TypographyOptions`, `ComponentInternalInstance`, `Blockly.Block`, `IVector2`, `YoganOptions`, `EventMapper`, `transcodeTarget`, `ChannelTokenContract`, `InitSegment`, `SelectionNode`, `CS`, `VersionData`, `Quality`, `vscode.TreeItemCollapsibleState`, `ConfigData`, `Jws`, `IFunctionIndex`, `CommandsSet`, `RefObject`, `HTMLStyleElement`, `HashChangeEvent`, `ExcaliburGraphicsContext`, `TaskActionsEvaluator`, `ExecaChildProcess`, `ArcGISAuthError`, `FileMetadata`, `PeriodInfo`, `StacksMessage`, `SetupContext`, `TestFunctionImportEdmReturnTypeParameters`, `Answer`, `XNA_Texture2D`, `Frequency`, `StorageImpl`, `ReStruct`, `CtrExpBool`, `ReprOptions`, `requests.ListAutonomousDbPreviewVersionsRequest`, `Twitter.Status`, `TranslateParams`, `CodeLensParams`, `SchemaBuilder`, `ExecutionMessage`, `ExcaliburGraphicsContextWebGL`, `AutoBounds`, `ApiCall`, `Extent`, `FbFormModelField`, `TranslateConfig`, `GetServiceCommandInput`, `EfsMetricChange`, `ListOperationsCommandInput`, `SmartStartProvisioningEntry`, `UpdateQueryBuilder`, `SignatureResult`, `TransactionOpts`, `IRECAPIClient`, `PaneProps`, `ContextMenuRenderer`, `ErrorItem`, `EntityComparator`, `RegClass`, `ObserveForStatus`, `USBDevice`, `OverlayConfig`, `TEvent`, `SearchInWorkspaceFileNode`, `FieldFormatsSetup`, `ListUnspentOptions`, `PackageRegistryEntry`, `ITokenModel`, `MouseCoordinates`, `ComputedScales`, `PipelinePlugin`, `EdgeCalculatorSettings`, `WXML.TapEvent`, `InvalidParameterCombinationException`, `InternalHttpServiceStart`, `MethodMap`, `ApiCredentials`, `requests.ListLoadBalancersRequest`, `ObservableQuery`, `ContentPage`, `ArrayPaginationService`, `RARC.RARCFile`, `IColumn`, `TooltipOptions`, `HSD_JObjRoot_Instance`, `ILocation`, `MetricServiceClient`, `ShortConnectionDTO`, `ErrorDetails`, `pxtc.CompileOptions`, `EPNode`, `DeleteRepositoryPayload`, `IObjectWillChange`, `IPipeFn`, `IChunkHeader`, `DynamicEntries`, `ThyGuider`, `CanvasTypeHierarchy`, `SuiteNode`, `IActionContext`, `UpdateResponseData`, `IJobPreset`, `ReplicaSet`, `SchemaName`, `GLboolean`, `MacroBuffer`, `RoundingFn`, `EngineResults.SchemaPush`, `TRef`, `SetOperations`, `DecodedMessage`, `ConsoleColor`, `IndexFormat`, `TestDriver`, `ConfigFile`, `CopyrightInfo`, `Directories`, `IGenericTarget`, `IPizzasTable`, `Pswp`, `AtomicAssetsNamespace`, `ExternalModuleReference`, `QueryAccountsRequest`, `android.content.res.Resources`, `FloatingLabel`, `CreatePostDto`, `RadarColumnSeries`, `InboundTransport`, `FieldDestination`, `Arity1`, `SwalOptions`, `ScryptedDevice`, `WaitForEvent`, `requests.ListVmClustersRequest`, `Hmac`, `IObjectHash`, `android.graphics.drawable.Drawable`, `PDFDropdown`, `ContainerInspectInfo`, `TestFunctionImportSharedEntityReturnTypeParameters`, `QualifiedNameLike`, `CompilerEventFileDelete`, `ParsedParameters`, `TemplateCache`, `PaletteOutput`, `ScaleContinuousNumeric`, `ICombo`, `IJSONResult`, `LayerProps`, `AutoScaling`, `EnhancedTransaction`, `React.MouseEvent`, `FragmentType`, `ts.UnionTypeNode`, `IOptionsService`, `TParam`, `JSX.IntrinsicAttributes`, `SpatialCache`, `ApiReturn`, `EffectResult`, `C2`, `ResizeObserver`, `CandidatePersonalQualitiesService`, `RouteType`, `Lines.Segment`, `KV`, `XRView`, `MoonBoard`, `TerminalCommandOptions`, `T11`, `X12FunctionalGroup`, `TxParams`, `ActivatedRoute`, `FlashbotsBundleProvider`, `s.CodeGeneratorRequest`, `QueryGraph`, `IWalletContractService`, `FilterService`, `Twit`, `TimePicker`, `messages.Scenario`, `HSD_TExp`, `BinanceWebsocket`, `BlockFragment`, `RecordsFetchFilters`, `IMasks`, `PatternMatchKind`, `IJumpPosition`, `ListExecutionsCommandInput`, `UseSubscription`, `DescribeDBSubnetGroupsCommandInput`, `ILoggerInstance`, `SourceMap`, `DeclarativeEnvironment`, `IAttachmedFile`, `UpdateModelDetails`, `RenderPage`, `SimpleFunction`, `KeyID`, `ValueReflector`, `CompositeStrings`, `Shader3D`, `DeployStackResult`, `WorldComponent`, `BlockchainPropertiesService`, `Bind`, `TableClient`, `AsyncIterable`, `NestedContentField`, `IGitExecutionOptions`, `BreadcrumbItemType`, `StartedTestContainer`, `RedisStore`, `ShapeDef`, `IGrammar`, `CallHierarchyOutgoingCallsParams`, `CloudSchedulerClient`, `DescribeMaintenanceWindowExecutionTasksCommandInput`, `PluginItem`, `NavControllerBase`, `React.ChangeEvent`, `NoExtraProps`, `Report`, `ListBotsCommandInput`, `OpenApiSpec`, `MaybeAsync`, `RepositoryManager`, `AsyncResult`, `IHydrator`, `ServiceInstance`, `TexFunc`, `PartitionBackupInfo`, `ConceptDb`, `formValues`, `PluginInstance`, `ApplicationCommandRequest`, `EffectHandlers`, `TableItemState`, `ListMenu`, `matter.GrayMatterFile`, `LifelineHealthCheckResult`, `ListDeliverabilityTestReportsCommandInput`, `ScopeFn`, `IRouteTable`, `BigQueryRetrievalResult`, `MP4Box`, `IndexedGeometryMap`, `GetDomainCommandInput`, `G6Event`, `UserPreferencesService`, `ReduxReducer`, `IMappingState`, `WithNodeKeyProps`, `TypeTemplates`, `LocalProxiedEntry`, `Commands`, `FindAndModifyWriteOpResultObject`, `IDataSet`, `FormatterService`, `ControlPanelsContainerProps`, `OsdFieldType`, `NgGridItemEvent`, `StructureTower`, `Nuxtent.Query`, `CheerioOptions`, `PyVariable`, `IResultSetColumnKey`, `ColumnsProps`, `NoteStorage`, `CosmeticFilter`, `NamedExportBindings`, `CandidateResponderRule`, `T.Refs`, `NodeScene`, `VRMSpringBoneGroup`, `FieldVisConfig`, `ElementProfile`, `UA`, `TKeys`, `t.ValidationError`, `nconf.Provider`, `NuxtConfig`, `IPatient`, `DBusClient`, `Conversation`, `MapBrowserEvent`, `ConsoleMessage`, `Mdast.Parent`, `ProjectLock`, `ActJestMoveTimeTo`, `DescribeEventsCommandOutput`, `CLM.AppDefinition`, `ICore`, `DescribeEngineDefaultParametersCommandInput`, `PropertyModel`, `InitMessage`, `TsConfigResolver`, `SweetAlertResult`, `LoggerConfigType`, `InternalServerError`, `ParallelPlot`, `MapAdapterUpdateEnv`, `PreventAny`, `BuildEdgeStyle`, `GameEntityObject`, `QBoxLayout`, `BundleRef`, `BSTProcess`, `TrackEventType`, `CalendarEventStoreRecord`, `nameidata`, `DeployOpID`, `ITrackWFeatures`, `CompleterComponent`, `VimCompleteItem`, `RestoreDBClusterToPointInTimeCommandInput`, `IndividualTreeViewState`, `CursorQueryArgsType`, `GasModePage`, `ComponentTemplateListItem`, `InfluxVersion`, `TSESLint.Scope.Reference`, `ListModelConfig`, `IPropertiesAppender`, `RobotApiErrorResponse`, `CommonProps`, `GeneratedFiles`, `LoaderFactory`, `ListRoutesCommandInput`, `ApolloReactHoc.OperationOption`, `NodeParameterValue`, `THREE.DataTexture`, `DiagnosticLevel`, `IDataFilterInternal`, `AxisData`, `WebpackPluginInstance`, `Scene3D`, `ThumbnailSize`, `PlayerType`, `PreparedQuery`, `MarvinImage`, `ko.Observable`, `Availability`, `TsmOptions`, `graphql.GraphQLFieldConfigMap`, `Ppu`, `PointCloudHit`, `SelectorCache`, `MainState`, `OutputLink`, `IQueryState`, `OutliningSpan`, `ChromeNavLink`, `EventToken`, `IFinaleCompilerOptions`, `DaffNewsletterState`, `ModelFitArgs`, `AppUserCard`, `AppFileStatus`, `CreateJobTemplateCommandInput`, `core.BTCGetAccountPaths`, `NewSpecPageOptions`, `CustomIntegrationRegistry`, `NVMEntry`, `IFetchOptions`, `BuildState`, `GetWrongDependenciesParams`, `DetachVolumeCommandInput`, `LabelDefinitionJSON`, `RestClient`, `FontFace`, `IAbortAblePromise`, `IdentityProvider`, `HomePageProps`, `BaseSyntheticEvent`, `HypermergeNodeDetails`, `JsonRPC.Request`, `ILoaderOptionsPipe`, `ContractTransaction`, `DeclarationBlockConfig`, `ContextPosition`, `ComponentHTTPClient`, `CreateCategoryDto`, `ChannelStoredData`, `AccountingEvent`, `SObjectConfig`, `DescribeDetectorCommandInput`, `XRSession`, `AlertingAuthorization`, `AccountClient`, `ChangeEvent`, `Merge`, `TerminalService`, `ShapeGeometry`, `Unionized`, `StateInstance`, `MemoizedSelector`, `IActionItemUUID`, `ProcedureRecord`, `Monad2C`, `EntryNode`, `JsonMap`, `GetMembersCommand`, `CallContext`, `Declarator`, `ElevationRangeSource`, `ReplaceResult`, `app.LoggerService`, `RenderView`, `SavedObjectsStart`, `TestStatus`, `AccountingTemplateService`, `TabComponentProps`, `WaterfallChartData`, `PrivateEndpointDetails`, `CloseChannelParams`, `RoleContext`, `d.ConfigFlags`, `IAvatarBuilder`, `IAtomStsd`, `ProblemMatcher`, `JPAResourceData`, `ResourcePropsWithConfig`, `CountStatisticSummary`, `SxSymbol`, `GraphQLServer`, `CreateFieldResolverInfo`, `NodeAnnouncementMessage`, `RdsMetricChange`, `vscode.ConfigurationScope`, `PlatformPath`, `AccountBalanceService`, `GetRuleGroupCommandInput`, `PartytownWebWorker`, `ChannelInfo`, `MeasureStyle`, `Suggester`, `ProofMateItem`, `AndDeciderInput`, `TranscriptConsequenceSummary`, `IPrimitiveExpression`, `Station`, `MultisigData`, `MdcRadioGroup`, `WebSocketTransport`, `ColumnDefinitions`, `IFetchParams`, `MyController`, `SubReducer`, `MapObjActorInitInfo`, `ConflictException`, `UserClaims`, `vsc.CancellationToken`, `SchemaArgInputType`, `PackageContribution`, `ClaimStrategy`, `CreatePagesArgs`, `GameMarks`, `IThriftRequest`, `FunctionKey`, `InterfaceWithoutReturnValue`, `ThyTreeService`, `NodesVersionCompatibility`, `ENDElement`, `Actor`, `ListObjectsResponse`, `IService`, `ParserFnWithCtx`, `FrameResult`, `InternalViewRef`, `IIPCClient`, `Levels`, `DashboardContainerFactoryDefinition`, `ScaffoldType.Local`, `SimpleGridRecord`, `Toggle.Props`, `OptimizerVariable`, `ListContentsCommandInput`, `WechatSettingService`, `EzModel`, `CalloutContextOptions`, `XPCOM.nsIChannel`, `ParsedJob`, `AllDestinations`, `NzNotificationDataOptions`, `WorkingDayOfYearByMonth`, `HandlerDecorator`, `CompilerSystemRemoveDirectoryOptions`, `DaffGetCategoryResponse`, `TradeType`, `AbstractMaterialNode`, `LocalizeRouterService`, `IAtomHeader`, `GenericComboBoxProps`, `EditorOptions`, `tslint.RuleFailure`, `ScaleByFactor`, `ValidateResponse`, `IPluginsContext`, `HeatmapConfig`, `ModifyDBClusterParameterGroupCommandInput`, `VisibleBoundary`, `Rx.AjaxRequest`, `ToolAttr`, `InputMode`, `NotWrappable`, `WebVRSystem`, `TProtocol`, `DiscordEvents`, `AuthContextState`, `VertexBuffer3D`, `TerraformAuthorizationCommandInitializer`, `SchemaElement`, `ParseCxt`, `GX.IndTexBiasSel`, `PermissionConstraints`, `WriterToString`, `ThyDialogRef`, `ResolvedFunctionType`, `IPossibleParameterFile`, `PadModel`, `TypingIndicatorStylesProps`, `WorldState`, `NVM500NodeInfo`, `ReflectContext`, `SyncMode`, `FrameBase`, `HubUtility`, `MetamaskState`, `CommonTypes.NotificationTypes.LastKnown`, `requests.ListPackagesRequest`, `MessageHandlerContext`, `ArrayProperty`, `TestFabricRegistryEntry`, `STDataSourceResult`, `IAmAnotherExportedWithEqual`, `HistoryTreeItem`, `MpqHash`, `Http3RequestNode`, `SnapshotOrInstance`, `AccordionProps`, `Blockly.WorkspaceSvg`, `GraphMode`, `ICalAttendee`, `DeleteContext`, `MarkExtensionSpec`, `AbiStateUpdate`, `RequestConfig`, `RemoteVideoStream`, `SdkPingPongFrame`, `WorkflowMapper`, `SwaggerOptions`, `SwapTable`, `Zerg`, `FilePickTriggerProps`, `ts.SwitchStatement`, `Physics2DDirectBodyStateSW`, `IRelatedEntities`, `AggregateBuilder`, `HydrusFile`, `ServiceWorker`, `BoolShape`, `SignedTokenTransferOptions`, `ConfirmDialogDataModel`, `SyncMemoryDebe`, `Fields`, `StepListener`, `TestInputHandler`, `ScanMetadata`, `IGDILogger`, `ContractInterface`, `ANIME_DICT`, `YearProgressModel`, `GridValueFormatterParams`, `NumberLiteralExpr`, `AudioOutputFormatImpl`, `NormalizedReadResult`, `OverrideOptions`, `ClJobs`, `NaviRequest`, `DeleteServerCommandInput`, `JQueryPromise`, `CustomTransformers`, `AnalysisDataModel`, `PortalService`, `GroupOrName`, `MinAdjacencyListArray`, `CssClass`, `TickPositionsArray`, `AcceptableType`, `Path3`, `TradeSearchRequest`, `evt_exec`, `ColumnReference`, `AccountMeta`, `GameObjectInfo`, `ItemSocket`, `InitiateAuthResponse`, `GlobalStorageOptionsHandler`, `ExecuteShellCommandFunction`, `Stores`, `IPty`, `TestEnvironment`, `TEmitted`, `SwingRopePoint`, `ServiceProperties`, `TwoFactorEmailRequest`, `LastSnapshot`, `PutAccountsRequestMessage`, `ComponentStyle`, `CreateStudioCommandInput`, `FactoryContext`, `Diagram`, `PlayerInput`, `SchemaOverview`, `RoseChartSlice`, `ExpressionAttributeValues`, `Highcharts.AnnotationEventObject`, `IdQuery`, `IThrottlingMetrics`, `MssEncryption`, `Authenticator`, `UpdateDistributionCommandInput`, `ex.Actor`, `QueryOrderRequest`, `SessionUserAgent`, `ComposedChartTickProps`, `MockLink`, `RpcContext`, `JSXContext`, `FetchTicketsActions`, `AuthProviderProps`, `PassportStatic`, `GitRemote`, `FunctionAnnotationNode`, `web3.Connection`, `IMode`, `TestNodeList`, `ImportWithGenerics`, `MalVal`, `requests.ListZonesRequest`, `ApplicationCollection`, `BrowserFeatureKey`, `ErrorsByEcoSystem`, `Fees`, `ModuleImport`, `DeleteResourceCommandInput`, `Telemetry.TelemetryEvent`, `HostsByIpMap`, `MeterScale`, `ColorSwitchCCReport`, `VMLElement`, `TTableOperand`, `StreamGraphNode`, `WorkflowStatus`, `TransactionSkeletonType`, `ReadonlyArray`, `GitHubIssue`, `AlertType`, `PermissionsResource`, `EthereumLedger`, `RenderPass`, `BaseInput`, `TasksState`, `DateRangeItemInfo`, `DeserializeEvent`, `DfsResult`, `ListChannelMembershipsCommandInput`, `ListDatasetsResponse`, `DeclarationFlags`, `CompletionExpressionCandidate`, `RewardTicket`, `WriteTournamentRecordRequest_TournamentRecordWrite`, `ActionMessage`, `requests.ListScheduledJobsRequest`, `RippleConfig`, `FeedbackContextInfo`, `DeleteDomainRequest`, `TransferItemFlatNode`, `NormalisedFrame`, `RootStoreState`, `Hub`, `Attendee`, `ConnectionMessage`, `MidiValue`, `GeoLatLng`, `DataTypeFields`, `WorkerArgs`, `BackgroundFilterVideoFrameProcessorObserver`, `AudioStreamFormat`, `IEcsServerGroupCommand`, `CustomCompletionItem`, `Reportable`, `TicTacToeAppState`, `ITagObject`, `AuthCode`, `Artefact`, `DragLayerMonitor`, `RawAlertInstance`, `DatePrecision`, `SlaveTimeline`, `sourceTextureFormat`, `RedBlackTreeStructure`, `AwaitEventEmitter`, `monaco.editor.IMarkerData`, `MemoOptions`, `ImmutableFrameTree`, `MultiFn2`, `cloudwatch.MetricChange`, `MockRepository`, `CodeMirror.Doc`, `Worksheet`, `ComponentEventType`, `AppDependencies`, `ItemTypes`, `ClientsService`, `IndexMapping`, `TraitLocation`, `ListAlarmsRequest`, `IHttpClientResponse`, `Radians`, `ListenerCallbackData`, `Extend`, `CreateTransactionOptions`, `MdcRipple`, `MarkdownFile`, `NextConnect`, `PaginationInput`, `IAdministrationItemRoute`, `BroadcastEventListener`, `CreateTableNode`, `IMenuContext`, `IFabricWallet`, `MessageDoc`, `Hooker`, `ParticipantInfo`, `TypedMap`, `TestMessages`, `ColumnSetting`, `WritableStreamDefaultWriter`, `ListCustomVerificationEmailTemplatesCommandInput`, `ChaincodeStub`, `DeleteJobTemplateCommandInput`, `Calendar_Contracts.IEventSource`, `NodeCryptoCreateDecipher`, `Jenkins`, `GenericMetricsChart`, `IConstrutor`, `SearchEsListItemSchema`, `VorbisDecoder`, `ApplicationSummary`, `ChildWindowLocationProps`, `AppNotificationManager`, `UpSampling2DLayerArgs`, `InsightType`, `UniversalRenderingContext`, `CounterST`, `Maximum`, `Bytes32`, `ChannelMetadataObject`, `BookStoreService`, `RadixTokenDefinition`, `DueState`, `TransactionSigner`, `ControlPanelSectionConfig`, `GfxMipFilterMode`, `VStackProps`, `LoginProps`, `RefactorEditInfo`, `HomeComponent`, `Difference`, `Firebase`, `ACLType`, `ExtractRef`, `GraphBatchedTransferAppState`, `PasswordBasedPreset`, `TestAccounts`, `core.ApiRequest`, `io.ModelArtifacts`, `CommandLineOptionOfCustomType`, `GotResponse`, `android.support.v7.widget.RecyclerView`, `IDiagramState`, `Monster`, `Klass`, `LernaPackage`, `AlternatingCCTreeNode`, `ContractAbiDefinition`, `TextBuffer`, `CodeEditor`, `CurrentUserService`, `BlockedHit`, `_HttpClient`, `MaybeRef`, `PaymentState`, `IEntityError`, `ConnectionSettings`, `FirestorePluginOptions`, `CallInfo`, `ProfileServiceAPI`, `Adventure`, `CodeFlowAnalyzer`, `AnimationController`, `YAxis`, `WindowState`, `AssetProps`, `IdentityMap`, `AcctStoreDict`, `ElTreeModelData`, `IrecService`, `TransportRequestOptions`, `EncoderOptions`, `PreProcessedFileInfo`, `MapOfClasses`, `LogoutOptions`, `SwitchCase`, `MessagingPayload`, `IHeftJestDataFileJson`, `requests.ListCompartmentsRequest`, `WaitForYellowSourceState`, `XNotificationOption`, `QueryTimestampsCommandInput`, `OpenSearchDashboardsDatatableColumnMeta`, `PDFPageEmbedder`, `SpecList`, `IFontManager`, `address`, `IStartTsoParms`, `LineGeometry`, `IiOSSimulator`, `TldrawApp`, `ITypeUnion`, `DeepReadonlyObject`, `CLIEngine`, `RxnArrow`, `QueryEngineRequestHeaders`, `Quad`, `ParsedMessagePartPlaceholder`, `IMiddlewareHandler`, `DescribeAlarmsCommandInput`, `Matrix2D`, `NpmPackageManipulator`, `TestRaster`, `InternalStack`, `NodeLoadInformation`, `VariableLikeDeclaration`, `UIProposal`, `SitesFixesParserOptions`, `InstanceResult`, `TargetLocation`, `RBXScriptConnection`, `Highcharts.PolarSeries`, `PlainObject`, `BleepsSettings`, `MockWebSocket`, `ISizes`, `IWithComputed`, `SqrlSlot`, `AzExtLocation`, `RockType`, `BlockchainTreeItem`, `IChannelServices`, `AccountResource`, `ThirdPartyCapDescriptor`, `HLTVPage`, `BridgeConfig`, `EditOptions`, `FaktoryControl`, `InputLink`, `TypedDictEntry`, `IDiffObject`, `IndicatorProps`, `MetaDefinition`, `TablePipeline`, `HierarchyParents`, `NumericValuesResult`, `TodoAppDriver`, `DialogContentProps`, `ActionFunction1`, `CreateImageCommandInput`, `ChangePasswordCommandInput`, `ChatPlugService`, `UICollectionViewFlowLinearLayoutImpl`, `LegendValue`, `VideoTileState`, `DemoChildGenerator`, `ReportTarget`, `EmbedObj`, `AjaxResponse`, `DeleteScheduledActionCommandInput`, `DebugProtocol.SetVariableArguments`, `BytecodeLinkReference`, `HTMLTableRowElement`, `LookaroundAssertion`, `In`, `EffectContext`, `InternalUnitRuntimeContext`, `CreateElement`, `WebGLEngine`, `StatisticsSetType`, `Member`, `SessionStorageSources`, `IMainState`, `BaseRouteName`, `AccessTokenData`, `firestore.DocumentReference`, `RecommendationCount`, `SourceInformation`, `ListKeyVersionsRequest`, `UUIDType`, `Moniker`, `SiteVariablesPrepared`, `ResolvedConnection`, `DbResult`, `TargetResponderRecipeResponderRule`, `IndexedGeometry`, `TextureFormat`, `NgParsedDecorator`, `ExtraGate`, `GraphSignedTransferAppState`, `B9`, `ts.WatchOfConfigFile`, `JJunction`, `Vendor`, `StringTable`, `MergeOptions`, `ComponentModule`, `RuntimePlugin`, `InsertQueryNode`, `NoteItem`, `AudioContext`, `Draft.EditorState`, `PrimaryKeyOptions`, `RunnerOptions`, `EntryHashB64`, `ItemEventData`, `HttpResponseOptions`, `SearchStrategyProvider`, `UIEvent`, `PackageFailures`, `PageEditContextInterface`, `DaffCart`, `FoamFeature`, `ROM`, `OptionsDevice`, `IFragment`, `DocumentReference`, `ListTagsForResourcesCommandInput`, `LocationType`, `Exporter`, `GradientAngularVelocity`, `IOptimizelyFeature`, `VariantCfg`, `ConstantBackoff`, `vscode.Disposable`, `WrapConfig`, `Model.Option`, `SocketEvent`, `RestContext`, `Prefixer`, `FloatSym`, `EnvironmentVariables`, `DynamicFormService`, `ReactionId`, `ProviderCard`, `DeleteDBParameterGroupCommandInput`, `FirebaseDatabaseService`, `MessageStatus`, `LanguageClientConstructor`, `ILinkedListNode`, `StringSet`, `ScopeGraph`, `CreateTargetResponderRecipeDetails`, `DescribeScheduleCommandInput`, `WorkItemService`, `IScoreCounter`, `IGarbageCollectionDetailsBase`, `SecondaryUnit`, `ComponentEnhancer`, `HomePluginSetupDependencies`, `CallErrors`, `WhitelistInstance`, `PropertyAccessExpression`, `InputListProps`, `Web3.TransactionReceipt`, `RecordingStream`, `GetGlobalObjectOptions`, `Tweenable`, `MediaMarshaller`, `RFNT`, `ModifyRelationOptions`, `ChainableHost`, `K8sResource`, `Timeslot`, `DSVParsedArray`, `ActualTextMarker`, `IAnyObject`, `CodeMaker`, `TemplateSpan`, `ComponentCompilerData`, `StartMigrationCommandInput`, `UpdateTargetDetectorRecipe`, `AbstractParser`, `StoredEventBatchPointer`, `CompressedPatch`, `PluginFactory`, `QuickFixQueryInformation`, `WithEmptyEnum`, `UmiPluginNProgressConfig`, `Slice`, `Elements`, `BundleManager`, `TVShow`, `ManifestApplication`, `BSPNode`, `DaffMagentoCartTransformer`, `ConnectionCallback`, `IGroupDataArray`, `TrackingInfo`, `UserConfigSettings`, `WorkflowContext`, `ConfigProvider`, `RepositoryStatisticsReadModel`, `PendingMaintenanceAction`, `IAuth`, `EditId`, `dia.Link`, `LevelActionTypes`, `Repl`, `DAL.DEVICE_ID_COMPASS`, `Apply1`, `URLQuery`, `DistinctValuesRequestOptions`, `Chars`, `IBaseTabState`, `NestedOptionHost`, `SavingsManager`, `ProductSearchParams`, `BundleRefs`, `UpperMainBelow`, `CalcFun`, `EntityAddOptions`, `ANC`, `ZoneManagerProps`, `PaginateResult`, `CaseStatuses`, `PlugyStash`, `S3.Types.PutObjectRequest`, `MultipleDeclaration`, `NotSkeleton`, `CreateTableBuilder`, `ColorSchemeName`, `ArrayLiteralExpression`, `QueryConditionOptions`, `Twitter.User`, `LineIndexSnapshot`, `d.CompilerEventName`, `BalanceMap`, `PoliticalAgendasData`, `LinkedListNode`, `WebGLRenderCompatibilityInfo`, `IMrepoConfigFile`, `RectGrid`, `TransferBrowserFftSpeechCommandRecognizer`, `IndexOp`, `MapValue`, `DraftArray`, `IUniform`, `CardTypes`, `TRaw`, `GeneratorNode`, `Reduction`, `MDCChipActionFocusBehavior`, `PriceRangeImpl`, `StateNavigator`, `AtomRef`, `AuthResourceUrlBuilder`, `IMetricsRegistry`, `TestCommand`, `ErrorDataKind`, `IGetTimeSlotInput`, `ManagementAgentPluginDetails`, `ResizeHandler`, `IconButtonGridProps`, `EmotionCache`, `GanttDate`, `V1Servicemonitor`, `EventProcessor`, `T14`, `SemVer`, `JobMetadata`, `TrackedEither`, `JimpImage`, `Repertoire`, `ISavedObjectsPointInTimeFinder`, `EAggregationState`, `SocketType`, `AccessorNames`, `ServiceWorkerConfig`, `PieLayerState`, `UnknownObject`, `S3DestinationConfiguration`, `ListControllerProps`, `ListJobsResponse`, `AutoSuggestData`, `WaveType`, `requests.ListDedicatedVmHostInstanceShapesRequest`, `ModelPlayer`, `SimulationOptions`, `IdentifiedReference`, `IActivitiesGetByContactState`, `IServer`, `NzModalService`, `ConstantExpr`, `SemanticsAction`, `configuration.Publications`, `SerializedCard`, `StationComplementPlugin`, `SliderBase`, `IContentFilter`, `AggTypeConfig`, `ItemResponse`, `TrackInfo`, `TElementNode`, `ListenerEntry`, `requests.ListVolumeAttachmentsRequest`, `JAddOn`, `RelativeLink`, `CronosClient`, `TActions`, `WorkflowStateType`, `IngredientReducerState`, `ChartPointsSource`, `BuilderDataManager`, `ActionImportRequestBuilder`, `TLE.Value`, `ButtonToggleComponent`, `MSIVmTokenCredentials`, `commonLib.IExtra`, `MimeParserNode`, `SMTCallSimple`, `TxtParentNode`, `NamedFragments`, `PlayerModel`, `SMTCallGeneral`, `ICompiler`, `JumpPosition`, `Export`, `Instantiable`, `BackendWasm`, `BattleFiledData`, `MonsterProps`, `Registerable`, `LayoutSettings`, `OscillatorType`, `FieldArgs`, `DeleteResourcePolicyRequest`, `BatchGetItemInput`, `WaitImageOptions`, `AdminIdentity`, `ProfileIdentifier`, `IOEither`, `Circline`, `LangiumServices`, `FileAccessData`, `xyData`, `BencheeSuite`, `RenderButtonsArgs`, `GetOptions`, `EnvSimple`, `CanvasPath`, `FlagshipTypes.Config`, `IParserConfig`, `VisualizeAppProps`, `TransactionResponseItem`, `TrackerConfig`, `DashboardConfig`, `THREE.Box3`, `NumericScaleLike`, `GitAPI`, `Http3QPackDecoder`, `EnhancedStore`, `EntityStateRecord`, `ChartRequest`, `Recommendation`, `CVDocument`, `TypeAssertionMap`, `AwsService`, `ReportingAPIClient`, `ManifestData`, `ManagedHsm`, `BaseConfig`, `Colony`, `VoilaGridStackWidget`, `HttpService`, `ExpressRouteCircuit`, `StylableSymbol`, `FeatureAppearance`, `ThunkCreator`, `CheckOptions`, `requests.ListAcceptedAgreementsRequest`, `DescribeModelCommandInput`, `FactPath`, `RegisterDto`, `ListDatasetGroupsCommandInput`, `CustomPluginOptions`, `AppContainer`, `FlashArguments`, `IUserDTO`, `ViewQueriesFunction`, `DocumentClassList`, `CustomizePanelProps`, `Vocabulary`, `BackgroundProcessState`, `GraphDataset`, `Batch`, `HistoryValue`, `WorkspaceService`, `IEcsDockerImage`, `EnhancedTestStore`, `FlowWildcardImport`, `YearToDateProgressPresetModel`, `PodSecurityPolicy`, `ISubmitEvent`, `LayoutConfigJson`, `GbTreeNode`, `IChildNode`, `VariableData`, `SharedStreetsGeometry`, `CalendarViewEvent`, `GetRouteCommandInput`, `ExpressionAstFunction`, `JQuery.TriggeredEvent`, `ClassGenerics`, `AlertComponent`, `ResolvedConfiguration`, `IActor`, `ISerDeDataSet`, `CommandReturn`, `ParametersHelper`, `IRootPosition`, `IMainMenu`, `PropsHandler`, `BindingDescriptor`, `ScraperOptions`, `MenuItem`, `ListTypeNode`, `OpenSeaPort`, `MarkdownTableRow`, `NamedBounds`, `TaskPriority`, `HttpInterceptor`, `IFileModel`, `Myth`, `FindCursor`, `Functor1`, `CategorizedClassDoc`, `SnapshotField`, `MockOtokenInstance`, `lf.schema.Column`, `ManagedShortTermRetentionPolicyName`, `BuildResults`, `ShapeModel`, `DefaultToastOptions`, `WebKitGestureEvent`, `ZAR`, `CaseReducerActions`, `GridElement`, `MockMessageClump`, `RecurringBill`, `GraphQLEntityFields`, `DeleteResourcePolicyCommandInput`, `GraphqlApi`, `HTMLIonRadioElement`, `UpSetProps`, `TestMaskComponent`, `CreateAssetCommandInput`, `Lead`, `StringifyOptions`, `IAmazonInstanceTypeOverride`, `Z64LibSupportedGames`, `TypePairArray`, `WizardStep`, `MockDocumentFragment`, `IPredictableSupportCode`, `ContentCache`, `VariableColor`, `net.Socket`, `ExpressResponse`, `EducationalMaterial`, `IFirmwareCodePlace`, `ECSComponentInterface`, `Sign`, `FluentNavigator`, `Deferred`, `ProviderConfiguration`, `GraphQLServiceContext`, `MultiDictionary`, `ChannelFactoryRegistry`, `IFieldSchema`, `ProcessRepresentationChainModifier`, `ShadowboxSettings`, `DynamicEllipseDrawerService`, `NativeComputationManager`, `DeploymentCenterData`, `VertexLayout`, `SignedStopLimitOrder`, `Dimensionless`, `CrochetCapability`, `yubo.MainReducer`, `Party`, `PubRelease`, `OrderByItemNode`, `ts.ResolvedModuleFull`, `StateChangeListener`, `DRIVERS`, `IndexResponse`, `RawNodeData`, `ImageGLRenderer`, `DQAgent`, `IProjectInfo`, `RadixAID`, `JMapIdInfo`, `ISlideObject`, `RouteExecutionFromInput`, `Region`, `LoggerText`, `NodeChanges`, `TestingLogger`, `SExp`, `css.Node`, `InstalledPlugin`, `SavedObjectsServiceStart`, `t`, `SerializerState`, `RateType`, `CallControlOptions`, `TypeAssertionSetValue`, `requests.ListAutonomousContainerDatabaseDataguardAssociationsRequest`, `NavigationEvent`, `ImGui.Vec4`, `RunLengthChunk`, `TimeType`, `ChannelTreeItem`, `BackupShortTermRetentionPolicy`, `ActivityComputer`, `PluginsService`, `TriggerId`, `ItemGroup`, `TransformPointFn`, `EntityApi`, `StdSignDoc`, `Shader_t`, `MangoGroup`, `Ent`, `B1`, `MenuType`, `IBatch`, `UserUpdate`, `ArrayDiff`, `PropertyMap`, `IOrganizationTeamCreateInput`, `PiPostAction`, `FormService`, `MockDialog`, `FunctionStats`, `PairTree`, `TooltipPortalSettings`, `ForgotPasswordVerifyAccountsValidationResult`, `SignedAndChainedBlockType`, `WorkRequestClient`, `HandlerArgs`, `QueryEngineRequest`, `Month`, `GanttGroupInternal`, `EmitHost`, `ArticleService`, `IKeyRing`, `PropertyDescription`, `GfxrRenderTargetID`, `AttrParamMapper`, `IPrintableApplication`, `IAgreementConnector`, `SnackBarService`, `EncodeInfoDisplayItem`, `WowContext`, `FileParseResult`, `ExprDependencies`, `UnivariateBezier`, `IRNGNormal`, `LogParams`, `CallExpression`, `d.PlatformPath`, `DeployOptions`, `IRegion`, `PolygonBoxObject`, `HttpEventType`, `RequestParser`, `BlueGreenManifests`, `ScaleType`, `Fiddle`, `VoiceChannel`, `NoteCollectionService`, `CommentData`, `PipelineTarget`, `EntitySelectorsFactory`, `SnackbarType`, `ENUM.AfflictionType`, `IPipeable`, `ISimpleGraphable`, `IMonthAggregatedEmployeeStatistics`, `Delta`, `DebugOption`, `SecureCookieOptions`, `TypeAlternative`, `CredentialCache`, `SerializationOption`, `workspaces.ProjectDefinition`, `ResolvedCoreOptions`, `BaseUI5Node`, `RouteDefinitionParams`, `StopWatch`, `ScreenState`, `Equaler`, `DeleteAliasCommandInput`, `DiscoverLegacyProps`, `AssetEvent`, `JsDocAndComment`, `NetworkInfoStore`, `ChannelSettings`, `TabbableHTMLProps`, `ListImagesCommandInput`, `AliasMap`, `SourceFileStructure`, `ItemSpace`, `requests.ListAutonomousDatabasesRequest`, `ControlProps`, `LibraryComponent`, `JSONSchemaStore`, `IEditor`, `LeveledDebugger`, `INotificationOptions`, `builder.IEvent`, `ifm.IHeaders`, `PostRequest`, `IOneClickAppIdentifier`, `Symbol`, `DynamoDBClient`, `ThreadData`, `ITemplateDiff`, `TypeVarScopeId`, `IDocumentService`, `WechatMiniprogram.CanvasContext`, `MonitoringAdapter`, `ResolverMethodOpts`, `Skin`, `PositioningPlacement`, `DataRows`, `EventmitHandler`, `ClientOpts`, `UpSetQueries`, `SourceInfo`, `Path2D`, `NamedFragmentDefinition`, `SiteStatus`, `restify.Server`, `ILendingPool`, `ISummaryTreeWithStats`, `cg.Role`, `QuestionMatrixDynamicModel`, `ConfigurableCreateInfo`, `MatchDoc`, `CISKubeBenchReport`, `IReactionDisposer`, `FullPath`, `WalletInfo`, `requests.ListWafLogsRequest`, `ConditionOperator`, `IFieldType`, `BrowsePath`, `JMapLinkInfo`, `MatchFilter`, `StorageService`, `WsPresentationService`, `ProseMark`, `OptionsNameMap`, `DocumentStateContext`, `AnalysisCache`, `VariableNode`, `LoaderProps`, `ContainerWarning`, `PathParams`, `JobSummary`, `SessionWorkspace`, `DirectiveBinding`, `AffineTransform`, `MarkedString`, `TEdge`, `ListConfig`, `Events.preupdate`, `ReportingEventLogger`, `MediaKey`, `MultisigBuilder`, `IModel`, `IDeviceWithSupply`, `ZWaveNode`, `AgeRepartitionType`, `SetbackState`, `OnSubscriptionDataOptions`, `AppServiceRegistration`, `EvaluatedTemplateArgument`, `requests.ListInternetGatewaysRequest`, `General`, `FieldsService`, `hubCommon.IModel`, `IDebtsGetByContactState`, `NetworkListenerArgs`, `WorkflowItemDataService`, `AccountApple_VarsEntry`, `ts.InterfaceType`, `GetByIdOptions`, `FiatCurrency`, `IgnoreDiagnosticResult`, `ImageFormat`, `SelectorList`, `UseFilterManagerProps`, `GroupBy`, `PartialSequenceLength`, `DebugProtocol.Event`, `apid.RuleId`, `vec4`, `CellValue`, `DataProxyErrorInfo`, `ProviderOption`, `ControlledStateProperies`, `ScalarNode`, `ProjectionMetadata`, `RepoFrontend`, `GradSaveFunc`, `WorkspaceEdit`, `vscode.CodeAction`, `RemoteFile`, `WizardTestComponent`, `TComponentControls`, `Units`, `SaveEntities`, `DescribeBackupsCommandInput`, `HardhatUserConfig`, `MatchHandler`, `DaffProductFactory`, `FastPath`, `IssueOptions`, `NineZoneState`, `PriceData`, `StylesConfig`, `Project`, `ThyTooltipDirective`, `ExpString`, `DMMF.ModelAction`, `AnimationBoneKeyFrameJson`, `PromiseEventResp`, `ServerService`, `IBlockchainEvent`, `Survey.SurveyModel`, `ParamsFilter`, `FormType`, `BlockComponent`, `InternalSymbol`, `ControllableLabel`, `InputChart`, `Escrow`, `CoverageMap`, `CinemaHallSeat`, `SinglelineTextLayout`, `PmsiListType`, `DiceRoller`, `EventService`, `MetricsOptions`, `TransferDirection`, `Installer`, `IntrospectionQuery`, `RetryConfigState`, `TargetType`, `TileTexSet`, `MovieService`, `SolutionSet`, `IGuardResult`, `PlaceholderMapper`, `TStylingContext`, `tsc.TypeChecker`, `ToastConfigCommon`, `IntPair`, `def.View`, `IDatabaseDataAction`, `RoomPartialState`, `TreeSitterDocument`, `RawBackStore`, `DropIdentifier`, `requests.ListCertificatesRequest`, `AwarenessUpdate`, `BlockBlobURL`, `ISourceFileReference`, `AccountsOperationIO`, `JestTotalResults`, `GetUpgradeStatusCommandInput`, `ViewerNetworkEventStarted`, `PersonData`, `StartDeps`, `DropType`, `QnaPost`, `TrendResult`, `PredicateFn`, `ActionsObservable`, `StackItemType`, `Checkbox`, `SequenceExpression`, `SimpleManipulator`, `MultipleLiteralTypeDeclaration`, `UseRefetchState`, `CreateUserRequest`, `ProfileData`, `TextEditorEdit`, `TrophySubmission`, `IUiSettingsClient`, `RTDB.Get`, `FeatureInterface`, `ComponentRegister`, `CustomSettings`, `CBService`, `AuthorizeParamsDto`, `FieldMappingSpec`, `ChooseActionStateMachine`, `GetSuccess`, `TaggedProsemirrorNode`, `ParseScope`, `PresetType`, `TokenPayload`, `MacAddressInfo`, `BLAKE2s`, `TemplateAnalysis`, `SavedObjectsCreateOptions`, `TestSuiteInstance`, `RawPermissionOverwriteData`, `InstanceNamedFactory`, `Plane3dByOriginAndVectors`, `FileTransport`, `capnp.Orphan`, `ValueMetadataNumeric`, `FormattedExecutionResult`, `ReadonlyTuple`, `TestCaseSetup`, `vscode.WorkspaceEdit`, `Capacity`, `OptionValues`, `TResult`, `RcModuleV2`, `HierarchyOfMaps`, `DeleteSnapshotScheduleCommandInput`, `WrappedAnalyticsEvent`, `JhiDataUtils`, `SetupParams`, `D3Service`, `VisualizationConfig`, `DependencyGraph`, `VisiteRepartitionType`, `TaskResolver`, `ConeRightSide`, `ListClustersCommandInput`, `OriginGroup`, `IAreaItemLevel`, `FileResult`, `DatabaseUser`, `AssociationType`, `ListUsersCommandOutput`, `HandleError`, `CallParams`, `LogBuilder`, `Drag`, `PendingModifiedValues`, `Bounds`, `ItemPositionCacheEntry`, `ICreateOptions`, `GeneralInfo`, `NextCommandOptions`, `W2`, `ProcessingPayload`, `AccessDeniedException`, `RenderInfo`, `IVottJsonExportProviderOptions`, `SpreadElement`, `SequentDescriptor`, `IEntityState`, `JsonSchemaOptions`, `DirectoryEntry`, `MetadataPackage`, `RuleEngine`, `ConsoleContext`, `CompoundStatementListContext`, `requests.ListCrossconnectPortSpeedShapesRequest`, `IFiles`, `Web3.CallData`, `CalendarConstants`, `RouterReq`, `OnPreResponseHandler`, `TimelineLite`, `DaffCategoryPageMetadataFactory`, `DatasetStatistics`, `IObjectType`, `ITextDiff`, `Json`, `ButteryFile`, `CustomFontEmbedder`, `TRK1AnimationEntry`, `JsxOpeningElement`, `DeleteResourcePolicyResponse`, `IndexPatternRef`, `EntryCollection`, `InternalFailureException`, `MockAdapter`, `IStyledComponent`, `EnvironmentType`, `d.HydrateDocumentOptions`, `PubSub`, `FeedbackDelay`, `OperatingSystem.Windows`, `iTickEvent`, `CronJob`, `IPath`, `ZonesManager`, `FibaroVenetianBlindCCGet`, `Angulartics2AppInsights`, `EndpointOperationCommandInput`, `UST`, `ReactorConfig`, `PointMarkerOptions`, `MetaDataRequest`, `HandshakeType`, `MindNodeModel`, `FirstColumnPadCalculator`, `cloudwatch.Metric`, `CompareFunc`, `t.TETemplate`, `GameObjectGroup`, `ContainerOptions`, `AudioMixObserver`, `DragDropConfig`, `TelegramBot.Message`, `Services.UIHelper`, `IMutationTree`, `Volume`, `MockPeerCertificate`, `Redis`, `Version`, `CustomResource`, `ITreeNodeAttrs`, `Browser.WebIdl`, `FragLoaderContext`, `BigQueryRetrieval`, `flags.Kind`, `IRequestHandler`, `ContractClass`, `OperationResponseDetails`, `NumberExpression`, `KxxRecordBalance`, `ComponentDoc`, `V1`, `requests.ListDbSystemPatchHistoryEntriesRequest`, `SelectionModel.ClearMode`, `DeleteDeviceCommandInput`, `AgentService`, `SigningMethod`, `ReplExpect.AppAndCount`, `TypeFeatures`, `OBJLoader`, `SpawnFlags`, `FlattenLevel`, `SalesInvoice`, `WebLayer`, `ServiceDefinition`, `fhir.Location`, `WebSocket`, `ImmutableSet`, `CartProduct`, `Commit`, `ScannedPolymerElement`, `MessageMatcher`, `ErrorTypes`, `MutableColorRgba`, `BrowserHistory`, `HTMLIonModalElement`, `UseProps`, `TransformFlags`, `Nexus`, `BaseSymbolReference`, `DataSourceConfig`, `CmsModel`, `OperationInstance`, `NftType`, `SocketClass`, `SlicedExecution`, `IActivity`, `Poly`, `NodeVo`, `SAO`, `CodeBuilderConcrete`, `OafService`, `SMTConst`, `ts.System`, `PatternInfo`, `IndexSpecification`, `UsersOrganizationsService`, `BuildVariables`, `ChannelEffects`, `xLuceneVariables`, `sast.Node`, `Repository`, `vscode.ProviderResult`, `TracingBase`, `TreeDecoration.Data`, `TransactionStatus`, `Grammar`, `IAttr`, `OMapper`, `SubmissionController`, `QueryCacheEntry`, `TempStats`, `CarImage`, `PetStoreProduct`, `ApolloQueryElement`, `CSharpFieldType`, `TransformableInfo`, `PlanPriceSpecManager`, `HasUniqueIdentifier`, `MagitChange`, `RSPState`, `EPerson`, `SelectionOptions`, `CreateError`, `PRNG`, `ModelItem`, `ForInitializer`, `InvokeMethod`, `ImportRecord`, `RemoteHandler`, `GraphNode`, `requests.ListCloudAutonomousVmClustersRequest`, `GitHubCommit`, `ApplicationRef`, `IPluginAuth`, `AttributeViewInfo`, `Sink`, `TestMochaAdapter`, `ListGatewaysCommandInput`, `ContainerConfig`, `Exception`, `ListViewCell`, `RegistrationType`, `ITitusServerGroupCommand`, `AssetReference`, `ContextTransformInfo`, `DialogSource`, `OperationParameter`, `preference.Set`, `JSONScanner`, `NoOpStep`, `GridCellParams`, `MXAnimatedIconData`, `Move`, `ShContextMenuItemDirective`, `ISiteDesign`, `CustomHttpResponseOptions`, `JSBI`, `IBrowser`, `Zoom`, `UIApplication`, `ContrastColors`, `ThyClickPositioner`, `PageBuilderContextObject`, `ClipRenderContext`, `DockerConfig`, `PiEditPropertyProjection`, `CompilerCtx`, `thrift.IThriftField`, `QuantityLabel`, `BezierCurve3d`, `IConfig`, `EmitterManager`, `SessionPort`, `MockedFunction`, `TextSpan`, `LuaMultiReturn`, `UnidirectionalLinkedTransferAppAction`, `NotFound`, `IThemeRegistration`, `StatsError`, `LegacyDateFormat`, `WriteBuffer`, `DebugThread`, `DOMException`, `DidChangeWatchedFilesParams`, `CmsStorageEntry`, `GX.ColorSrc`, `ClassificationResult`, `ExportOptions`, `ISpecialStory`, `BreakpointObserver`, `Warehouse`, `AddSchema`, `IDockerComposeResult`, `XRViewport`, `AuthStorage`, `CanvasElementModel`, `FileBrowserItem`, `IUpworkApiConfig`, `Platforms`, `BindingEditorContextInfo`, `AsyncArray`, `ToolbarItem`, `Func0`, `HttpOptions`, `ELBv2`, `IPersonDetails`, `NodeInstructure`, `AssignmentKind`, `MetadataError`, `ResolvedUrl`, `SheetChild`, `SpriteBaseProps`, `PngPong`, `GenericRetrier`, `ConversationNode`, `TextStylePropsPart`, `EffectHandler`, `GetMapParams`, `SsgRoute`, `viewEngine_NgModuleRef`, `QueryNameContext`, `RelativePath`, `PrivateProps`, `SliderInstance`, `JudgeClientEntity`, `DNSAddress`, `AuthenticationHelper`, `RemoteNode`, `DiscordMessageReactionAdd`, `IAppOption`, `SummaryData`, `UserRecord`, `EntityItem`, `CookieSerializeOptions`, `WechatyInterface`, `QualifiedRules`, `IncludeRecord`, `NineZoneStagePanelPaneManagerProps`, `ClusterRoleBinding`, `SqrlStatementSlot`, `SectionsService`, `RiskElementModel`, `DraymanComponent`, `ObservableArrayProxy`, `IPackage`, `ProviderToken`, `vscode.TextEditor`, `ReactVisTypeOptions`, `FileContent`, `IAppDef`, `ProviderConfig`, `IFruit`, `AggParam`, `AddressListItem`, `AutocompleteProvider`, `SyncedBackupModel`, `PixelLineSprite3D`, `IBlobMetadataStore`, `Seconds`, `Factory.Type`, `OnRefreshProps`, `IconsName`, `SwimlaneActionConnector`, `EventLog`, `IsoBuffer`, `ConfigurationChangeEvent`, `TerraformStack`, `CreateGroupRequest`, `QUnitAssert`, `IndexTemplate`, `IOpenSearchDashboardsSearchRequest`, `BMDObjectRenderer`, `React.Component`, `UdpTally`, `SFCBlockRaw`, `Outside`, `CLM.Condition`, `FirewallPolicy`, `WebSocketAdapter`, `a`, `AccountV10`, `PaymentMethod`, `AddressBook`, `Minimum`, `UserSessionService`, `Capability`, `PanelOptionsEditorBuilder`, `CreatedObject`, `SRT0_TexData`, `UntagResourceInput`, `OutputNode`, `WithExtends`, `WorkflowClient`, `DetachedRouteHandle`, `SwaggerJson`, `IMethod`, `Tabs.Tab`, `TimeRequestOptionsSourcesTargets`, `next.Origin`, `RoleKibanaPrivilege`, `Aes128Key`, `Compartment`, `SmallMultiplesGroupBy`, `ErrorResponse`, `colorModule.Color`, `AttributeToken`, `Controller$`, `MapLeafNodes`, `GetDeprecationsContext`, `DeleteProfile`, `FullType`, `NzDrawerService`, `StorageRecord`, `InitializeParams`, `MdDialog`, `ODataServiceFactory`, `CreateDBClusterParameterGroupCommandInput`, `ast.Grammar`, `JsxElement`, `ServiceRepresentation`, `SelectionData`, `IDashboard`, `JoinGroupRequest`, `TestDuplex`, `BrowserType`, `TokenInterface`, `StackBuilder`, `InstanceContainer`, `ConnectionAction`, `DateHistogramBucketAggDependencies`, `LibrarySeriesSeasonEpisode`, `ResourceTimelineViewWrapper`, `SettingsDataProvider`, `KeyAction`, `PostRef`, `SiteTreeItem`, `requests.ListExadataInfrastructuresRequest`, `COURSE_TYPE`, `JSDocTypeTag`, `IWorkflowDataProxyAdditionalKeys`, `K8sManagement`, `Swagger2Schema`, `ShortcutType`, `TimetableSession`, `DefaultClientMetricReport`, `EditableSelection`, `HALEndpointService`, `CodegenDesignLanguage`, `RedisConnectionManager`, `Bettor`, `ast.NodeList`, `OutputItem`, `SponsoredAuthorization`, `AllSettings`, `ExpensiveResource`, `DescribeEventsMessage`, `Alert`, `CreateProcessOption`, `Styler`, `Mutator`, `DocumentViewResponse`, `ComputedField`, `UITableViewCell`, `ServiceNowActionConnector`, `gameObject.Battery`, `CompoundPath`, `Alt1`, `IDrawData`, `WSClient`, `DeviceTypeJson`, `PutResourcePolicyCommandInput`, `DataChunk`, `PipelinesService`, `ComponentCompilerTypeReferences`, `IpGroup`, `VueAuthOptions`, `Stereo`, `ParticipantsRemovedListener`, `monaco.languages.IState`, `AbiOwnershipTransaction`, `UITraitCollection`, `TestHost`, `NetworkInterfaceInfo`, `HsButton`, `IParty`, `CreateUser`, `CompositeBatch`, `LayerFromTo`, `ISendingUser`, `ICard`, `DisabledTimeConfig`, `ElementLocation`, `HeaderProps`, `TypeConfig`, `PointerEventInit`, `TransitionStatus`, `TemplateRef`, `SelectOptionBase`, `d.ComponentCompilerMeta`, `FakeMetricsCollector`, `DisplayableState`, `VerifyEmailAccountsRequestMessage`, `Node2D`, `SearchTimeoutError`, `events.Handler`, `RoadmapType`, `Subtract`, `Cost`, `ECPairInterface`, `Right`, `BBox_t`, `EmitFiles`, `PlanetApplication`, `ProofDescriptor`, `supertest.Test`, `URIComponents`, `Sky`, `NumberNode`, `NativeError`, `ModuleNode`, `Mocha.Done`, `IScope`, `MultiKeyComparator`, `Styles`, `Http3PMeenanNode`, `AdministrationScreenService`, `IModelOptions`, `RefService`, `CustomUIClass`, `IToastProps`, `restify.Next`, `WorldLight`, `BulletOption`, `IResourceAnalysisResult`, `XRPose`, `CreateInstanceCommandInput`, `IDataFilter`, `ListStudiosCommandInput`, `DiezTypeMetadata`, `DatabaseSchemaImpl`, `SystemDomainApi`, `BoardSettings`, `QueryServiceStartDependencies`, `IFormValues`, `ContextWithActive`, `AttributeDerivatives`, `IAuthConfig`, `IndexedXYZCollection`, `SurveyCreator`, `WithBigInt`, `fromSettingsActions.GetSettingModelCollection`, `RedspotContext`, `MatchExecutor`, `StoreProvider`, `VoidType`, `SetGetPath`, `TestRenderer`, `KernelProfile`, `IFormContext`, `ExpectedTypeResult`, `Blob`, `DisposeResult`, `ZoneInfo`, `TransformerPayload`, `Utils`, `MagicExtensionWarning`, `MatGridList`, `ActiveContext`, `WitnessScopeModel`, `IKeysObject`, `DiagramEngine`, `FileTransfer`, `Rope`, `SafeAreaProps`, `CeloTxObject`, `TimelineRowStyle`, `ConstructionSite`, `TypeAttributeMap`, `RequestManager`, `IERC20`, `ModularPackageJson`, `PixelFormat`, `LogicalWhereExpr`, `AcceptChannelMessage`, `Turn`, `BasePackageInfo`, `HomeService`, `T1`, `ResolvedId`, `Actions`, `UrlState`, `BlobCreateSnapshotResponse`, `AccountOperation`, `CSSSelector`, `AvailabilitySlotsService`, `DeserializeFunction`, `Oas3`, `TextDocumentShowOptions`, `CharacterClassElement`, `L1Args`, `RowRendererProps`, `IParseOptions`, `PlantMember`, `OrganizationMembershipProps`, `IGroupData`, `CppBuildTask`, `GeneralObject`, `CurvePrimitive`, `GitBlame`, `AllInputs`, `ILine`, `SunBurstHierarchyNode`, `GlobalStyleComponent`, `core.ETHAccountPath`, `https.Server`, `InMemoryUser`, `FormRowModel`, `IModelReference`, `SeverityLevel`, `DoubleMap`, `Hill`, `CliArgs`, `VersionHistory`, `Insertion`, `SVGProps`, `UnhashedOrder`, `InstanceData`, `RedactChannelMessageCommandInput`, `TabsListProps`, `IEntry`, `IconProp`, `WritableData`, `ConditionalTransferUnlockedEventData`, `KeymapItem`, `TestLedgerChannel`, `BaseProtocolLabware`, `ForkOptions`, `SymFloat`, `CryptoEffectFrom`, `RemoteRepositoryRepository`, `ethereum.CallResult`, `BoundsOffsets`, `ICanvas`, `ObjectLiteralExpr`, `Experiment`, `DiffLayouterFactory`, `TransactionReceiptTruffle`, `BrowserHelper`, `FnO`, `ConsensusContext`, `HydrateStyleElement`, `PieVisParams`, `CallCompositePage`, `ISystemInfo`, `files.SourceDir`, `fabric.IEvent`, `LogService`, `ArrayComparator`, `ElementsTable`, `WithNumber`, `DragManager`, `PropertyOperationSetting`, `PrinterService`, `LongitudeLatitudeNumber`, `ArkApiProvider`, `OptionalRef`, `TransactionResponse`, `PiConcept`, `IDebugResult`, `FieldData`, `IStandaloneCodeEditor`, `AnyImportSyntax`, `ChatResponse`, `DictionaryExpandEntryNode`, `VideoInputType`, `GroupFrame`, `RtcpSourceDescriptionPacket`, `AdminAPI`, `ComponentModel`, `TokenService`, `theia.CancellationToken`, `SWRResponse`, `DiffPatcher`, `PushPathResult`, `InvalidateAPIKeyResult`, `BindingHelpers`, `KGS.DataDigest`, `SymlinkInode`, `PatternMappingExpandEntryNode`, `DomEventArg`, `SchemaDefinition`, `AxisAlignedBox3d`, `BluetoothRemoteGATTCharacteristic`, `WasmResult`, `InfrastructureRocket`, `FormatStringNode`, `ObservableLightBox`, `PluginWriteActionPayload`, `FeeStructure`, `CanvasThemePalette`, `IAuthUserWithPermissions`, `TransactionOverrides`, `WebSiteManagementModels.User`, `JsonAstKeyValue`, `NoteSnippetContent`, `ProcessResult`, `MenuPath`, `TemplateStringsArray`, `IUserProfile`, `requests.ListTransferJobsRequest`, `fs.ReadStream`, `ClassInfo`, `ChartsPluginSetup`, `AllocationItem`, `TaskFunctionCallback`, `NotificationPermission`, `UserMusicResult`, `SocialTokenV0`, `ContractEntry`, `ToneBufferSource`, `PolicyService`, `ParameterJoint`, `requests.ListAvailableUpdatesForManagedInstanceRequest`, `IndexPatternSpec`, `Rectangular`, `DataSourceType`, `SourceConfiguration`, `IEmail`, `BuilderRuntimeNode`, `ExecuteResult`, `ThunkAction`, `DeleteChannelMembershipCommandInput`, `Replacement`, `JobConfig`, `theia.Command`, `OverlayPositionBuilder`, `IFilterValue`, `MaybeProvider`, `oai3.Model`, `HalfEdge`, `MotionDataWithTimestamp`, `TaskExplorerDefinition`, `Forest`, `esbuild.OnResolveArgs`, `Entity.Notification`, `Outputs`, `Attributes`, `Frustum`, `B6`, `ChildAttributesItem`, `eris.Client`, `BroadlinkAPI`, `SakuliCoreProperties`, `Saga`, `GCPAuthOptions`, `ast.ExpressionNode`, `EditorManager`, `ComponentResolver`, `DbAbstractionLayer`, `ThumbnailModel`, `DaffCartPaymentMethod`, `RelatedViews`, `VSvgNode`, `TweenInput`, `DbObject`, `DropdownButtonProps`, `MessageService`, `ClassificationType`, `DiskEncryptionSet`, `DslQuery`, `EnumTypeComposer`, `SIZE`, `MutationContext`, `QueryPrecedenceCommandInput`, `DB`, `SelectionLocation`, `webhookDoc`, `SpacerProps`, `AcNotification`, `PackagesWithNewVersions`, `LitvisNarrative`, `FadeSession`, `StoreOrStoreId`, `MatchPresenceEvent`, `GLTexture`, `TransformationMatrix`, `ApplicationOptions`, `SpeakerWithTags`, `TypeScriptConfigurationBuilder`, `Kernel.IOptions`, `TreeViewInfo`, `CommandValues`, `IGlobalEvent`, `IContainerContext`, `CallbackManager`, `UpdateProjectRequest`, `FindUsersResult`, `ModifierToken`, `Web3Wrapper`, `IKeyEntry`, `CryptoContext`, `ListNotificationsRequest`, `MockERC20Instance`, `RecycleAccumulator`, `IHandlers`, `ITypeMatcher`, `UserConfigExport`, `EnumId`, `ValueID`, `StyleIR`, `EffortInfo`, `StreamReturn`, `Espree`, `FileDoc`, `DefaultBrowserBehavior`, `HttpCode`, `DirectoryInode`, `DSpaceSerializer`, `BlockTag`, `PrepareQuery`, `ts.JsxAttribute`, `BuildifierConfiguration`, `TagsProps`, `UploadFileOptions`, `SerializedStyles`, `Shadow`, `EntityComparisonField`, `UpdateActionDef`, `IModuleMinificationResult`, `WaterfallStepContext`, `CreateRoleDto`, `EventSearchRequest`, `ChaCha20Poly1305`, `ConfigKey`, `MdcTopAppBar`, `CalculatePvService`, `Storage`, `AssetID`, `P5`, `CallerIdentity`, `vscode.Selection`, `ProfileProviderResponse`, `RulesClient`, `ExpandPanelActionContext`, `WritableStreamBuffer`, `StoreKey`, `AllActions`, `ScalarTypeDefinitionNode`, `PluginStrategy`, `HomePublicPlugin`, `BreadcrumbLinkProps`, `RendererInfo`, `mat4`, `GitStashReference`, `ENABLED_STATUS`, `ValidationParamSchema`, `eventHandler`, `ClassBody`, `APIQuery`, `HTMLParser`, `Session.IModel`, `TSrc`, `Groupby`, `TE.TaskEither`, `AccessRule`, `ProjectMetadata`, `IKeyboardDefinitionStatus`, `UserAdministrative`, `INgWidgetContainerRawPosition`, `models.NetCore`, `FeatureSource`, `GDQOmnibarMilestoneTrackerPointElement`, `SnackBarOptions`, `CovidData`, `ICommandArgs`, `BaseMaterial`, `LinkReduxLRSType`, `MIPS.Register`, `BScroll`, `EntryTypes`, `Primitives.Value`, `MutationRequest`, `TronTransactionInfo`, `WFunction`, `GLTF.IAccessor`, `NoteRepository`, `FileStorageOption`, `ColorRegistry`, `NotificationData`, `Search`, `CoinbasePro`, `Unsubscribe`, `ClassNode`, `ContractFactory`, `Weighter`, `CustomLoader`, `IRadioGroupProps`, `AuthorReadModel`, `ProductsService`, `SavedQuery`, `BlockGroup`, `ApmConfiguration`, `CacheItem`, `SendToAddressOptions`, `GroupEventType`, `PluginConfigSchema`, `CmsModelFieldToGraphQLPlugin`, `TraverseContext`, `StopDeploymentCommandInput`, `Position`, `StableVer`, `VertexPlaceholder`, `IAvatarProps`, `CSC`, `Brew`, `GroupDescription`, `GrowableXYZArray`, `NullableMappedPosition`, `RemixConfig`, `ListPoliciesResponse`, `DeployStageExecutionStep`, `DataBlock`, `CodePoint`, `NZBUnityOptions`, `RenderOptions`, `NavigationNode`, `IFeatures`, `LodopResult`, `AnimationMixer`, `MsgSharedGroup`, `GaussianDropout`, `ICXCreateOrder`, `ModuleLoader`, `InventoryStat`, `ConfigModule`, `DeviceRegistryService`, `VariantAnnotation`, `HostStatus`, `P10`, `Flap`, `ElasticsearchModifiedSource`, `StackSummary`, `LightChannelControl`, `KeyListener`, `LayertreeItemNode`, `Mesh`, `SetupFunction`, `DbSeed`, `IUserNote`, `FileEditorSpec`, `LinearSearchRange2dArray`, `IDeviceInformation`, `ChartParams`, `BoolQuery`, `MerchantGoodsSkuEntity`, `Agents`, `DataSourceSpec`, `TransferBtcBasedBlockchain`, `PlanSummaryData`, `GetPointTransformerFn`, `AsyncCommandResult`, `AssignableObject`, `ConnectionManager`, `LongestNudgedSegment`, `CallIdRef`, `KeyCurve`, `DataWriter`, `GraphError`, `StackMap`, `JsonIdentityInfoOptions`, `DescriptorProto_ReservedRange`, `ContentLinesArrayLike`, `OptionInfo`, `InPacketBase`, `requests.SearchListingsRequest`, `Touch`, `ControllerSessionScope`, `FrontMatterResult`, `FontMetrics`, `CharacterStatsCalculator`, `DocumentRange`, `vscode.TextEditorSelectionChangeEvent`, `LVarKeySet`, `Immediate`, `CommandPath`, `TreeFile`, `PiProperty`, `KeywordDefinition`, `GfxrAttachmentClearDescriptor`, `MappedType`, `TokenProps`, `BehaviorTreeStatus`, `TransportRequest`, `GluegunCommand`, `IHTTPRequest`, `TextAlign`, `TimeTrackerService`, `MetricDimension`, `LocalFluidDataStoreContext`, `StylusState`, `TileBoundingBox`, `Deep`, `Bills`, `QuadrantRefHandler`, `SuccessfulResponse`, `ITagMatchInfo`, `ExecContext`, `UIHelper`, `ChartData`, `ProcessConfigurationOptions`, `fused.Activation`, `DoorLockCCOperationReport`, `OF.IDropdownOption`, `UnregisterCallback`, `TraitLabel`, `KeystrokeAction`, `QueryProvidersResponse`, `TelemetrySender`, `VisibilityEdge`, `PerpMarketConfig`, `AsyncStateRetry`, `ActionFactory`, `FlexibleConnectedPositionStrategyOrigin`, `NzGraphDataDef`, `ParsedArgs`, `ShellWindow`, `TaskConfiguration`, `RemoteSeries`, `MdcDialogRef`, `GridsterItemComponentInterface`, `requests.ListBastionsRequest`, `InterventionTipsStatuses.StatusIds`, `PackageNode`, `CheckboxGroupState`, `GridCellValue`, `CacheBehavior`, `DescribeDatasetRequest`, `StopDBClusterCommandInput`, `SorterResult`, `PositionConfig`, `Kleisli`, `ExtensionProperty`, `PointToPointLine`, `BlockedRequester`, `MentionInputorElement`, `ResourcePolicy`, `ISkillInfo`, `StreamHead`, `SankeyDiagramSettings`, `ObservableChainQuery`, `CallSignatureInfo`, `K2`, `AsyncIterableQuery`, `ChainIndexingAPI`, `MetricAggType`, `OpenApiApi`, `Margins`, `CoinHostInfo`, `IEnhancer`, `Nil`, `FixCreator`, `Scenario`, `ComponentEvent`, `PositionOffset`, `SavedObjectWithMetadata`, `IntrospectionNamedTypeRef`, `DescriptorIndexNode`, `SideNavItem`, `BusyService`, `CharLevelState`, `DeleteQueryNode`, `CreateWebhookCommandInput`, `GeneralActionType`, `EntityCollections`, `d.OutputTargetDistCustomElements`, `CreateDataSourceCommandInput`, `FlushMode`, `DocsTargetSpec`, `Computed`, `NgbPanelChangeEvent`, `OpenAPIV3.ParameterObject`, `TimelineStep`, `IPlan`, `MagicRPCError`, `HeatmapSpec`, `TestResults`, `ConstantTypes`, `GravityArgs`, `ThyTransferItem`, `FoodItem`, `MultiSelectRenderProps`, `Escape`, `DogeBalanceMonitorConfig`, `ImplicitImport`, `SubtitlesTrackId`, `ParameterNode`, `ProgressType`, `IApiInfo`, `GraphWidget`, `ReferenceRenderHandler`, `DataTypeDefinition`, `UserDataService`, `CredentialsOptions`, `ExportKind`, `OutputFlags`, `Viewer.Viewer`, `PurchaseProcessor`, `SGSymbolItem`, `ResolveStylesOptions`, `Order2Bezier`, `TabularSource`, `RSAPublicKey`, `ASTValidationContext`, `JurisdictionDomainModel`, `SavedObjectsRawDocParseOptions`, `EdaColumn`, `NodeUnit`, `Recorder`, `Crdp.Runtime.ConsoleAPICalledEvent`, `sdk.TranslationRecognitionResult`, `AuthResult`, `PaletteOptions`, `ResponseCV`, `HttpProbeProtocol`, `DataCache`, `IConnectionOptions`, `Stat`, `PopoverProps`, `CaseConnector`, `Testability`, `EntityQuery`, `CommandLineTool`, `DateOption`, `FormInternal`, `SwapFn`, `ResourcesToAttributions`, `LayoutService`, `FormValidator`, `MyElement`, `DataTableEntry`, `OrgEntityPoliciesPlan`, `YawPitchRollAngles`, `PatientService`, `IBoxPlotData`, `VerticalAlignments`, `theia.Uri`, `Highcharts.AnnotationControllable`, `PlayingCard`, `DownloadService`, `WithAttributes`, `QuestionProps`, `CreateParameters`, `ItemT`, `PopupOptions`, `Aes256Key`, `UserInfoData`, `QuerySet`, `BlockDoc`, `BanList`, `TikTokScraper`, `ElementAst`, `LineSeriesStyle`, `HTTPRequest`, `FullConfiguration`, `sdk.ConnectionEventArgs`, `AnimatorChildRef`, `CardTagsProps`, `IGenericEntity`, `BoundBox`, `EncodeApiReturnOutput`, `FlagsT`, `BoundsData`, `ITelemetryLogger`, `UpdateVolumeCommandInput`, `IWorkerChannelMessage`, `GoldTokenWrapper`, `DeployedApplication`, `OffchainTx`, `MetadataRegistryState`, `TxnJsonObject`, `MochaOptions`, `DebugStateLegend`, `Intf`, `AnyNativeEvent`, `EmailModuleOptions`, `Gaxios`, `MyDefaultThunkResult`, `WorkspaceDefinition`, `IGetJobPresetInput`, `Todo_viewer`, `BotCalculationContext`, `StoredReference`, `EdmxParameter`, `Specialty`, `BatchDeleteImageCommandInput`, `LibraryBuilderImpl`, `SalesOrderState`, `FileError`, `KeyVaultManagementClient`, `UserResumeEntity`, `LanguageService`, `DomainPanel`, `DocumentationLink`, `TaskFn`, `ProjectConfiguration`, `MatchedRoute`, `StateService`, `PersistenceHelpers`, `TextProperty`, `ProjectedXYArray`, `TypedAxiosRequestConfig`, `HtmlTemplate`, `CLM.TrainDialog`, `Description`, `MessageRenderer`, `IEmeraldVault`, `ObservableThese`, `RawValue`, `ApplicationTemplateAPIAction`, `Endorser`, `GestureDelegate`, `SchedulerApplication`, `TaskContext`, `Talk`, `OperationType`, `typescript.SourceFile`, `PropertyMeta`, `GfxIndexBufferDescriptor`, `TableSuggestion`, `DependencyResolved`, `DevicePixelRatioMonitor`, `IPrompter`, `LinkSteamRequest`, `GenericBreakpoint`, `CustomContext`, `JSONWebToken`, `CreateProjectCommand`, `LinkOptions`, `IUpsertScalingPolicyCommand`, `StyleNode`, `RepeatForRegion`, `ExpressionValueError`, `PersonAssignmentData`, `DatabaseFactory`, `XhrDetails`, `FormDataEvent`, `TimelineById`, `BigInteger`, `ScriptType`, `SNS`, `quat`, `SidenavContextType`, `TableInfo`, `NavigationTrie`, `InvokeDecl`, `FromViewOpts`, `React.SVGAttributes`, `PaymentRequest`, `IValidBranch`, `MockDialogRef`, `PrecommitMessage`, `VersionCheckTTL`, `ButtonGroupProps`, `IDocumentManager`, `IconMenuItem`, `CoordinatesObject`, `KeySchema`, `Amounts`, `ShortcutEventOutput`, `BYOCLayer`, `GridChildComponentProps`, `FixedPointX64`, `JsonRpcHandlerFunc`, `ISettingAttribute`, `MessagingOptions`, `GLfloat`, `IsometricPath`, `IAuthenticatedHubSearchOptions`, `NewFOS`, `BodyComplexClient`, `CacheAdapter`, `IncomingRegistry`, `IRangeResponse`, `TemplateArguments`, `GetIdentityVerificationAttributesCommandInput`, `PushRequest`, `PaySlip`, `ConnectedWallet`, `PluginModule`, `ListCertificatesResponse`, `FieldRule`, `GetPrTimelineQuery`, `api.IZoweTree`, `Monitor`, `GitRevisionReference`, `THREE.ShaderMaterialParameters`, `Impl`, `MapStoreState`, `apid.ManualReserveOption`, `Events.collisionend`, `CreateTagDto`, `ITimelineGroup`, `FlipCorner`, `IItemAddResult`, `InitializeMiddleware`, `IGatewayMember`, `IRenderableColumn`, `ButtonItem`, `ScanMessage`, `LoaderConfig`, `ServerClass`, `NotionalAndUnrealizedPnlReturns`, `FormLayoutProps`, `CoerceFunc`, `OAuthProvider`, `CryptoFunctionService`, `VisibilityGraph`, `TrackProp`, `TestFile`, `ITestFluidObject`, `TypeType`, `ClampedMonth`, `TableForeignKey`, `MachinomyOptions`, `TorusPipe`, `BufferWriter`, `ServerIO`, `StateHandler`, `StatusResponse`, `FluentRules`, `BlobStore`, `UpgradePolicy`, `ObjectDoc`, `IPropertyValueDescriptor`, `React.UIEvent`, `SCondition`, `QCProject`, `EditorSchema`, `IExportFormat`, `XcodeCloud`, `DeleteFilterCommandInput`, `Inherits`, `GetBalanceActivityOptions`, `HeroSearchService`, `ListRulesResponse`, `Comma`, `SimpleExpression`, `vscode.Uri`, `AlertDescriptionProps`, `KeyIndexImpl`, `DeltaAssertions`, `SpyTransport`, `CacheState`, `PartialQueryLang`, `IProcFSEntry`, `File`, `ProofResponseCoordinator`, `XPlace`, `NamedCurveAlgorithms`, `IModelDecoration`, `SearchState`, `QualifiedValueInfo`, `LoggingServiceConfiguration`, `TreeWalker`, `KeywordMatcher`, `Weave`, `DescribeExportCommandInput`, `UpdateSubscriptionsRequest`, `AndroidTarget`, `OpenSearchDashboardsDatatableRow`, `InlineComposition`, `FindWithRegexCb`, `GetRepository`, `RouteMatcher`, `RouterActions`, `PartyPromote`, `ReferenceMonth`, `LabelProvider`, `ModuleDest`, `ChromeExtensionService`, `FolderData`, `FIRUser`, `IRoomObject`, `PointCloudOctreeGeometry`, `SortableSpecService`, `VmixConfiguration`, `requests.ListSecretVersionsRequest`, `Text.JSON`, `RollupCache`, `PluginsStatusService`, `Augur`, `ReadReceiptReceivedEvent`, `MetaProperty`, `ProxyIntegrationTester`, `serializedNodeWithId`, `DeleteStorageObjectId`, `ArenaAllocationResult`, `HomeAssistant`, `SetStateAction`, `ServiceContext`, `MessagePayload`, `pxtc.ExtensionInfo`, `BitcoinTransactionInfo`, `AerialMappers`, `ImportResolver`, `INotifyItem`, `AuthResponse`, `HintResults`, `AttrValue`, `ng.IModule`, `DragObjectWithType`, `ILanguageTemplate`, `TestObservable`, `SignupResponse.AsObject`, `JsonTokenizer`, `MenuPositionY`, `EnvTestContext`, `PrintExpressionFlags`, `ProductRepository`, `commander.Command`, `IQueryParameter`, `TaskUser`, `GlobalInstructionType`, `Deno.ListenOptions`, `ISharedContent`, `HighlighterOptions`, `BundleResult`, `MemAttribute`, `ITasksGetByContactState`, `IRGBA`, `MetaProps`, `ConfigDict`, `NativeBookmarks.BookmarkTreeNode`, `WyvernSchemaName`, `UserListQueryDto`, `Highcharts.NetworkgraphPoint`, `FieldError`, `PropTypes`, `SingleKey`, `TrackedStorage`, `ImageAsset`, `ClientSocketPacket`, `SectionItem`, `TransactionGenerationAttempt`, `TActorParent`, `JsonStringifierParserCommonContext`, `IExpectedArtifact`, `Processes`, `TPDISearchParams`, `IAtom`, `JsonComposite`, `TrialVisitConstraint`, `Permission`, `UseLazyQueryOptions`, `BaseAdapterPool`, `grpc.Code`, `DragSourceArguments`, `IDropdownOption`, `Cat`, `MarginPoolInstance`, `Elevation`, `TaskObserver`, `DataProps`, `IProject`, `SemicolonClassElement`, `B15`, `FavoritePropertiesOrderInfo`, `SingleValueProps`, `DotnetInsightsGcDocument`, `PrerenderUrlResults`, `webpack.Stats`, `IRecurringExpenseDeleteInput`, `ScreenReaderPartitionTableProps`, `IFilm`, `ERC1155ReceiverMock`, `TickSource`, `JwtPair`, `FindListOptions`, `Stopwatch`, `TournamentList`, `ExpressionRenderError`, `UsersResponse`, `IValueConverter`, `NzUploadFile`, `SourceComponent`, `ExtractorInput`, `CategoryMap`, `Hierarchy`, `TileMatrixSet`, `ServiceProvider`, `GetProfileCommandInput`, `BundleEntry`, `RegisteredPlugin`, `Signale`, `IValidateProjectOptions`, `ActionFilterAsync`, `ActivationLayerArgs`, `StorageMeta`, `AnnotationsMap`, `RnM2BufferView`, `OrderWithContract`, `CanvasSpace`, `FieldFormatConvertFunction`, `Arg0`, `order`, `FeatureVersion`, `i.PackageInfo`, `AttrRewriteMap`, `KnownAction`, `AwsOrganizationReader`, `ParentType`, `UhkModule`, `EntityMapperService`, `Powerlevel`, `DescribeClustersCommandInput`, `IGovernancePowerDelegationToken`, `RectScrollFixed`, `Opt`, `VirtualFilesystem`, `SBGClient`, `ThanksHistory`, `PinoLogger`, `FriendRequest`, `CacheMap`, `PackageManagers`, `IVector`, `VDocumentFragment`, `BarProps`, `ITilemap`, `TQuery`, `AlertUtils`, `ExtensionManager`, `CacheWithRedirects`, `CreateEmManagedExternalExadataMemberEntityDetails`, `UnixTimestamp`, `RSSFeed`, `TestStateBase`, `AllowAction`, `EncodedTransaction`, `TestElementDrivesElement`, `FileDiagnostics`, `ReduxAppState`, `PartyMatchmakerTicket`, `ts.ArrowFunction`, `KeybindingScope`, `DescribeJobLogItemsCommandInput`, `MangoClient`, `DescData`, `Datatable`, `GradientVelocity`, `SagaActionTypes`, `ts.Diagnostic`, `MatrixProvider`, `PadplusMessagePayload`, `AsyncLogger`, `PageListProps`, `vscode.TestController`, `StorageFile`, `Overlord`, `ApiV2Client`, `HashMapEntry`, `ControlSetItem`, `TreeElement`, `TransformHelper`, `ScopeableRequest`, `AnalyticsModule`, `SMTIf`, `TransactionButtonInnerProps`, `IConnect`, `StateBottomSheet`, `d.JsDoc`, `AnnotatedFunctionABI`, `SmsHandler`, `TransformProps`, `ExtendedChannel`, `IGitManager`, `Unpacked`, `EmbedType`, `CkbMint`, `IDBPTransaction`, `ServerPlatform`, `supertest.SuperTest`, `OutputGroup`, `IExpressionLoaderParams`, `HistoryItem`, `ParentComponent`, `AnnotationActionTypes`, `QueryResultProps`, `LabelAccessor`, `HsSensorUnit`, `SingleSpaAngularOptions`, `AppModels`, `TestKmsKeyring`, `PerspectiveTransform`, `LGraphCanvas`, `SingleYAMLDocument`, `AreaPointItem`, `AssignmentStatus`, `DeleteAppRequest`, `LocationInformation`, `WalkerStateParam`, `RunConfiguration`, `IXYOperator`, `RequestorBuilder`, `kifp_element`, `Dependency1`, `SheetContextType`, `AuthPartialState`, `AbstractHttpAdapter`, `ReservedIP`, `ObstaclePort`, `Chart`, `GoldTokenInstance`, `MDCRippleFactory`, `sdk.IntentRecognitionResult`, `PuppetASTResolvedProperty`, `TETuple`, `DeleteDistributionCommandInput`, `PutImageCommandInput`, `SeedGenerator`, `TVector`, `ParserInputWithCtx`, `QueryMiddlewareParams`, `FileWrapper`, `User.Type`, `SchemaConfigParams`, `PluginConfigDescriptor`, `FieldProps`, `RemoveGroupControlAction`, `CallProviderProps`, `TypeObject`, `TypeC`, `FlushConfig`, `CsvFormatterStream`, `GnosisSafeContract`, `IndicesService`, `GlobalStateService`, `RollupOutput`, `IAsfObjectHeader`, `MeshColliderShape`, `ExplorerView`, `ReportConfigurationModel`, `StackParams`, `ValueAxis`, `ActivityRequestData`, `ScrollToColumnFn`, `InternalsImpl`, `CreateTestRendererParams`, `d.HostRef`, `EditorAction`, `PN`, `TestReadable`, `OneInchExchangeMock`, `ProgressTracker`, `ExtendedKeyInfo`, `Vue`, `ApiRun`, `RecordColumn`, `DynamicFormNode`, `LoggerAdapter`, `OrganizationsService`, `FeatureContext`, `MLKitVisionOptions`, `ApplicationTypeGroup`, `TransientGraphUtility`, `FileTrackerItem`, `FunctionDefinitionConfig`, `MenuModel`, `IServiceManager`, `StatementCST`, `EventData`, `IGitAccount`, `ConversationContent`, `ObjectProps`, `ModuleBase`, `Readable`, `ObjType`, `IEcsTargetGroupMapping`, `Attribute`, `LocalFraudProof`, `AnimGroupData`, `ChannelWrapper`, `OperationNode`, `O.Option`, `megalogDoc`, `GithubGlobalConfig`, `ImGui.Vec2`, `CoreHelpers`, `CollectionDataStub`, `IProperty`, `RecordMap`, `TPlayItem`, `TodosState`, `SceneParams`, `PlaywrightElementHandle`, `EidasRequest`, `NativeEventEmitter`, `Vector4d`, `JVertex`, `LoginParams`, `ModuleStoreSettings`, `BodyAndHeaders`, `GatsbyConfig`, `SequenceContract`, `ChunkRange`, `APIUser`, `FieldRenderProps`, `GitHubService`, `Image`, `Badge`, `ContentApiService`, `Substream`, `MonitoredElementInfo`, `sinon.SinonStub`, `MarkdownView`, `CommonIdentity`, `StreamResetResponseParam`, `Travis`, `ts.PropertyAccessExpression`, `Internals`, `DynamicArgument`, `DynamoDB.BatchGetItemOutput`, `SpatialOctreeNode`, `Bezier`, `FieldParser`, `SystemData`, `Highcharts.DataGroupingApproximationsArray`, `ProcessedImportResponse`, `EdgeSnapshot`, `AmdModule`, `EntityProps`, `CivilHelper`, `WriteTransactionRequest`, `ListingData`, `IHttpFetchError`, `RenderBufferTargetEnum`, `IPartitionLambdaConfig`, `NavSource`, `BlockchainCode`, `Journal`, `GetThreadResponseType`, `ScopeMap`, `DocInstance`, `FetchResult`, `PromptType`, `RemoteConfigTemplate`, `DataPublicPlugin`, `MomentInterval`, `StyleSheetType`, `AggregationResultMap`, `TypingIndicatorReceivedEvent`, `ColliderData`, `Subscriber`, `NodeStatus`, `setting`, `RouterMenuItem`, `ResolvedSchema`, `LayoutNode`, `ConfirmHandlerCallback`, `TestActions`, `ValidationRecord`, `InfoDialogService`, `BalanceActivityCallback`, `AccentColor`, `PotentialLemma`, `Groups`, `ProvideCompletionItemsSignature`, `Day`, `DynamicDialogRef`, `QueryCreateSchema`, `CRC16CCCommandEncapsulation`, `IndexState`, `requests.ListStreamPoolsRequest`, `ESTermSourceDescriptor`, `AxisDependency`, `ModelProps`, `Equality`, `Triangle3`, `RootNode`, `Locals`, `SessionKeySupplier`, `IGlobOptions`, `DissociatePackageCommandInput`, `EmployeeStore`, `TArrayValue`, `Should`, `ObjectShape`, `JSONRPCProvider`, `SpecialBreakTypes`, `NodeLink`, `Hint`, `ILabShell`, `MagicMessageEvent`, `CompilerEventDirDelete`, `BookingVariant`, `MouseService`, `HttpHeader`, `DialogProperty`, `PO`, `ItiririAsync`, `AddressHashMode.SerializeP2SH`, `A2`, `WorkingHour`, `RepoData`, `EntityToFix`, `DecodedIdToken`, `ExtendedError`, `IWmPicture`, `RadioChangeEvent`, `SnackbarContext`, `GetBotCommandInput`, `DatasetManager`, `SchemeObjectsByLayers`, `cc.Sprite`, `NamedModel`, `AirtableBase`, `EnumNodeAndEdgeStatus`, `INgWidgetEvent`, `DescribeAppCommandInput`, `DepListener`, `Hello`, `SelectorFn`, `dia.Graph`, `ThySlideConfig`, `PrintJsonWithErrorsArgs`, `ProofItem`, `CellTile`, `requests.ListNetworkSecurityGroupsRequest`, `THREE.OrthographicCamera`, `N6`, `ColorRulesOperator`, `ExpressRouteCircuitConnection`, `FilterDef`, `RoutableTileNode`, `ServerHelloVerifyRequest`, `AbstractRule`, `UserMessage`, `WishlistState`, `AlainSFConfig`, `JsxOpeningLikeElement`, `TSDNPromise.Reject`, `IMedia`, `MinimongoDb`, `ChunkExtractor`, `Model.LibraryStoreItemState`, `ClientAuthentication`, `SegNode`, `OrderedStringSet`, `DejaTile`, `Params$Create`, `FFMpegInput`, `DiagnosticAction`, `d.ComponentRuntimeHostListener`, `ViewState`, `HunspellFactory`, `apid.ChannelId`, `UiRequest`, `BluetoothScale`, `CodeBuilder`, `InlineFragmentNode`, `DeleteProjectRequest`, `ControlOptions`, `SFValue`, `AnyId`, `AutomationHelper`, `FilesystemProvider`, `VariableParserAST`, `UserInput`, `StatCalculated`, `SubModel`, `ITranslationMessagesFile`, `MinimalViewPortConfig`, `StructureMap`, `OperationSupportMatrix`, `Node.Expression`, `CoExpNum`, `ServiceIdRequestDetails`, `FsReaddirItem`, `paper.CompoundPath`, `DomainCategory`, `GPUImageCopyTexture`, `HostFileInformation`, `TenancyEntityOptions`, `Chlorinator`, `PubkeyInfo`, `DataReader`, `IIconSubset`, `DeleteAccountsValidationResult`, `SpecConfiguration`, `GetWebhookParams`, `SearchResultsLayer`, `IComputedValue`, `GluegunPrint`, `EdgeImmutPlain`, `HsdsId`, `FakeProviderConnection`, `ButtonInteraction`, `IPageContainer`, `TeamDocument`, `DailyApiResponse`, `OneIncomingExpectationRepository`, `ResolveResponse`, `FeeType`, `OperatorPrecedence`, `ListrTask`, `IGen`, `express.RequestHandler`, `GPUTextureFormat`, `SessionsActions`, `PostConditionPrincipal`, `GetCanonicalFileName`, `ListBundlesCommandInput`, `ApexLibraryTestRunExecutor`, `BillingGroup`, `IDriver`, `PutDedicatedIpWarmupAttributesCommandInput`, `TAuthUserInfo`, `DecompositionResult`, `BookService`, `NodeMap`, `SetupDependencies`, `RediagramGlobalConfig`, `UpdateServiceRequest`, `Basis`, `EitherExportOptions`, `CategoryTranslation`, `FlowDocument`, `WithdrawStakingRewardUnsigned`, `SupCore.Data.Schema`, `NodeMessage`, `Permute`, `RegularNode`, `SqrlErrorOutputOptions`, `ResultDate`, `DropdownMenuProps`, `EndOfDeclarationMarker`, `GraphQLSchemaPlugin`, `HttpTestingController`, `FileStream`, `lf.Database`, `IDraggableProps`, `DokiThemeConfig`, `CandleLimitType`, `ISceneView`, `FeedQueryVariables`, `NzSliderValue`, `SystemMouseCursor`, `Lumber`, `HookEnvironment`, `StreamedData`, `LCImpl`, `LogFunction`, `PropOptions`, `IValues`, `IndexPatternValue`, `INestApplication`, `interfaces.Container`, `IEditorStore`, `ICSVInConfig`, `ServerRequestHandler`, `Recursion`, `GetDomainDeliverabilityCampaignCommandInput`, `NavigationContainer`, `CoverageFlatFragment`, `FlatList`, `PDFAcroPushButton`, `UINavigationController`, `TelemetrySavedObject`, `IRequire`, `ActionCreatorWithNonInferrablePayload`, `TripleObject`, `Organizations`, `CmsEntry`, `TeamMember`, `HumidityControlSetpointCCGet`, `RegName`, `ExtendedKeyboardEvent`, `btSoftBodyWorldInfo`, `IVec3Term`, `NSV`, `PropertyDefinition`, `ContainerClient`, `CompositeParserException`, `AddressInformation`, `Cubic`, `WindowModule`, `UnitsProvider`, `BindingState`, `InputValue`, `IPricedItem`, `ViewerEventType`, `TraceStep`, `SimpleExprContext`, `FeederPool`, `PlotBandOptions`, `Sinks`, `IPublisher`, `RadixAtom`, `NamedTensor`, `MapSavedObjectAttributes`, `GetCellValueFn`, `TransactionDetails`, `NodeDependency`, `ITestRunnerOptions`, `EvaluatedExprNode`, `Resolvers`, `CategoryData`, `SimpleSelector`, `LastfmArtistShort`, `Split`, `AutocompleteSelectCellEditor`, `MdDialogConfig`, `CanaryAnalysisConfiguration`, `ResolvedPointer`, `WriteTransactionReply`, `APIOrder`, `TileDisplacementMap`, `IAccessor`, `MetaverseService`, `ServerDto`, `PortalProps`, `ServiceModule`, `AvailableSpaceInConsole`, `AbstractField`, `AuditLog`, `TimedVariableValue`, `primitive`, `IAdapter`, `TextureSourceLoader`, `BenefitService`, `ScrollService`, `TagValueParser`, `PluginHooks`, `RegisteredServiceAttributeReleasePolicy`, `ConfirmOptions`, `CeloTokenContract`, `Redlock`, `API.services.IXulService`, `ts.LiteralTypeNode`, `QueryParser.QueryNode`, `ListenerFunction`, `SharedStreetsReference`, `AdadeltaOptimizer`, `Space`, `UmlNotation`, `DescribeEventsCommandInput`, `WatcherFolderMap`, `DbSmartContract`, `FeatureAst`, `Focusable`, `MarkdownPostProcessorContext`, `TestDoc`, `Out`, `SubscriberType`, `IChannelState`, `InputLayer`, `TreeNodeValue`, `AiPrivateEndpointSummary`, `AvailabilityZone`, `Claims`, `CGAPIResourceHandle`, `MediaFileId`, `VaultActivity`, `ContractCallOverrides`, `TextElementGroupState`, `IncrementalNodeArray`, `DeviceConnection`, `TheCloudwatchDashboardStack`, `FileOutput`, `TreeSeriesNodeItemOption`, `HttpLink`, `RouterSource`, `SubPredArg`, `aws.s3.Bucket`, `KubectlContext`, `TEBopType`, `WholeHalfNone`, `J3DFrameCtrl`, `Koa.Next`, `VideoDescription`, `MemoryHistory`, `ApiRx`, `BucketMetadata`, `X12QueryEngine`, `ISelectHandlerReturn`, `Async`, `ParsedItem`, `AllocationUpdatedArg`, `Some`, `Patch`, `requests.ListClusterNetworkInstancesRequest`, `ServiceImplementations`, `NSNotification`, `ModItem`, `VariationInfo`, `DAL.DEVICE_ID_BUTTON_B`, `SingletonList`, `PromiseOrValue`, `TJSONObject`, `SourcemapPathTransformer`, `IoLog`, `JsonfyDatasource`, `Certificate`, `AppData`, `PublicAccessBlockConfiguration`, `DescribeStreamCommandInput`, `Rest`, `IndexerManagementResolverContext`, `CombinedItemPriceInfo`, `UIResource`, `DetailedCertificate`, `VerifierConfig`, `BuildEvents`, `ExecutionLogSlicer`, `ModifyClusterCommandInput`, `Sha256`, `RepositorySettingsValidation`, `IndicatorAggregateArithmetic`, `MessageStatusService`, `MDNav`, `VirtualNetworkTap`, `WalletI`, `StorageHelper`, `AppStatusStore`, `WeakMap`, `PartitionSmallMultiplesModel`, `HeaderStreamManipulator`, `FormatWrap`, `ADialog`, `FieldNode`, `TestingConfig`, `IPos`, `GetModelCommandInput`, `DeleteDBClusterCommandInput`, `PerQuadrant`, `PackageModuleRef`, `Diagonal`, `CdkTableDataSourceInput`, `IEntityType`, `ReverseIndex`, `MatchingOptions`, `CreateDomainRequest`, `Droppable`, `Canceler`, `ApplyBuffEvent`, `WorkflowStepInput`, `FeederData`, `FormatMetadata`, `ViewerConfiguration`, `BTCAccountPath`, `BaseTelemetryProperties`, `GfxMegaStateDescriptor`, `PaymentService`, `AutoScalingGroup`, `IViewModel`, `FcNode`, `EntityDeserializer`, `CloudFrontRequest`, `MouseEventToPrevent`, `MockStyleElement`, `JDevice`, `IndexPatternLayer`, `ProductVariant`, `LoadedVertexLayout`, `WebKitEntry`, `DefaultOptions`, `CreateGroup`, `HTMLDataListElement`, `LineChartLineMesh`, `EventEmitter`, `CopySink`, `CoapResponse`, `THREE.BufferAttribute`, `IEntityRef`, `AbstractClass`, `VisitOptions`, `GeoUnits`, `MinimalFS`, `FieldDef`, `EthUnlockRecord`, `Adapt.AdaptElement`, `LogAnalyticsParserField`, `AttributeParser`, `MetricSourceIntegration`, `HashKeyType`, `FilterEntity`, `RefType`, `SvelteElement`, `SignerWithAddress`, `GLTF.AccessorComponentType`, `ClipPlaneContainment`, `PDFOptions`, `CriteriaFilter`, `TransformMessage`, `LayoutItem`, `NodeClient`, `HammerInput`, `ICommandBarItemProps`, `PersonStatusType`, `NestedRoutes`, `EnumHelper`, `stream.Writable`, `ImageMimeType`, `NamespaceMember`, `TemplateParameter`, `IocContext`, `Fixer`, `IWatchCallback`, `Phase`, `HdPrivateNodeValid`, `IMetricAggConfig`, `PropEnhancerValueType`, `CollaborationWindow`, `ModeledMarker`, `BasicPoint`, `TypeCacheEntry`, `ConfigInterface`, `IExpense`, `ContactDocument`, `TabProps`, `UserDataCombined`, `ReactiveControllerHost`, `SupportedFormat`, `ValueOrPromise`, `CompositeFilterDescriptorCollection`, `KudosPollService`, `CurveChain`, `HomebridgePlugin`, `ESTree.AssignmentExpression`, `StoredNetwork`, `MapRewardNode`, `FaunaString`, `SocketClient`, `MappedStates`, `NextFn`, `ShuftiproKycResult`, `ISettingStorageModel`, `M2ORelation`, `peerconnection.Data`, `KeyBindingCommandFunction`, `BackupFile`, `StackLineData`, `Rectangle`, `CanActivate`, `Members`, `ScheduledEventRetryConfig`, `d.PackageJsonData`, `SnsMetricChange`, `LabelValue`, `TextType`, `sentry.SentryEvent`, `EventInput`, `PlayerLadder`, `ParsedRequestParams`, `IOdspTokenManagerCacheKey`, `SubtleButton`, `requests.ListTaggingWorkRequestLogsRequest`, `UserFunctionNamespaceDefinition`, `StateWrapper`, `DeleteOrganizationCommandInput`, `ChannelMessageList`, `IBaseProps`, `HdStellarPaymentsConfig`, `LoadBalancer`, `PairSide`, `SettingService`, `ODataEnumType`, `CreateGatewayCommandInput`, `NETWORK_NAME`, `ComponentFixture`, `textFieldModule.TextField`, `cc.Vec2`, `DataFileType`, `VAStepData`, `ProjectTemplate`, `TinaCMS`, `ScriptParsedEvent`, `RtpHeader`, `requests.ListInstancePoolInstancesRequest`, `Readonly`, `HubServer`, `EntityBuilder`, `CliCommand`, `PokemonService`, `CanvasTexture`, `ErrorLike`, `SlashingProtectionBlock`, `jdspec.SMap`, `InteractionReplyOptions`, `ReactIntl.InjectedIntl`, `KEXFailType`, `AstNodeWithLanguage`, `SemanticMeaning`, `Hit`, `FrameControlFactory`, `DietForm`, `RollupConfig`, `NumberConstructor`, `GlossyMaterial`, `ParserType`, `FilterOptionOption`, `FormActionType`, `DeviceManagerState`, `SpacesServiceStart`, `PositionGrid`, `AppiumDriver`, `NamedImport`, `GunScopePromise`, `EncryptionType`, `SpatialCategory`, `UpdateContent`, `FileObject`, `QueryFilterType`, `__String`, `requests.ListIdentityProviderGroupsRequest`, `iI18nConf`, `RTCSctpCapabilities`, `WechatQRCodeEntity`, `TokenCategory`, `Db`, `MyObject`, `RenameParams`, `TestSettings`, `TEChild`, `Wrapap`, `ColumnObjectType`, `IQueryInput`, `TestFunction`, `RunData`, `JsxAttributeLike`, `CreditCardView`, `EntityBuilderType`, `SubscriptionDiagnosticsDataTypePriv`, `Http3HeaderFrame`, `ResolvedElement`, `ListField`, `SqliteValue`, `ExtHostCommentThread`, `TestsManifest`, `OctreeNode`, `Vector2`, `EditorChange`, `PatternMatch`, `CSS.Properties`, `CephLine`, `CheckpointsOrCheckpointsId`, `Divider`, `Serializable.GraphSnapshot`, `SchemaRefContext`, `OpenAPI.HttpOperation`, `requests.ListImageShapeCompatibilityEntriesRequest`, `QuantumMove`, `Knex.TableBuilder`, `XHRBackend`, `mm.INativeTagDict`, `bitcoinish.BitcoinishPaymentTx`, `StdFunc`, `TooltipModel`, `chrome.tabs.Tab`, `XmlRecording`, `FabricEnvironmentTreeItem`, `CSSService`, `Fence`, `Dot`, `DashboardTableData`, `CoinbaseKey`, `IntPairSet`, `DetectionResult`, `Cloud`, `SelectableValue`, `IAssociationParams`, `OnEvent`, `ReportingCsvPanelAction`, `DecodedAttribute`, `SObject`, `Prefix`, `ActionSheetController`, `AbiItem`, `GetUserInfoQuery`, `TerraNetwork`, `NETWORK`, `VulnerabilityReport`, `PSIInteger`, `ObservableTitleTopBar`, `WalkResult`, `HipiePipeline`, `InlinableCode`, `PuppetClassInfo`, `PoiBatch`, `CrawlContext`, `firebase.firestore.QueryDocumentSnapshot`, `AnyShape`, `RenegotiationIndication`, `React.DragEventHandler`, `Toolbar`, `_ISchema`, `DOMAPI`, `Coder`, `ForestNode`, `NewPackagePolicyInput`, `PrimitiveBundle`, `ArrayBufferWalker`, `MessageType`, `FormError`, `FileSource`, `Declaration`, `AndroidBinding`, `P8`, `WindowInfo`, `MessageData`, `ThemeInterface`, `Command.Command`, `TypeScriptDeclarationBlock`, `Tsoa.ReferenceType`, `Email`, `SuperAgentRequest`, `MethodAbi`, `napa.zone.Zone`, `TestResolverDTO`, `IExpressionLoader`, `Cpu`, `ConfigureLogsCommandInput`, `ListFilesResult`, `HubConnection`, `BotAction`, `CancelQueryCommandInput`, `ISolutionExplorerService`, `GetPredicateParams`, `CheckConflictsParams`, `LoginUriData`, `IChannelFactory`, `ts.LanguageService`, `PromiseState`, `SimpleStatementContext`, `SpectatorFactory`, `MerkleTreeNode`, `IQuickeyOptions`, `MetricRegistry`, `DVector3d`, `UnitFormService`, `V1Deployment`, `DownloadJob`, `ThumbnailProps`, `TScope`, `SnapshotOptions`, `LoaderFn`, `TxOutput`, `IMatchingCriterions`, `ListApplicationsCommandOutput`, `SolcOutput`, `ComposedChartProps`, `IntTerm`, `ShortcutObject`, `StateInvocationParams`, `IOpenAPI`, `DeveloperExamplesPlugin`, `CameraState`, `Accessory`, `JKRCompressionType`, `CCResponsePredicate`, `ProjectStatsChartDataItem`, `ComputedEnum`, `ConstraintTiming`, `Revision`, `SFSchemaEnum`, `HttpResult`, `IRichTextObjectItem`, `PDFName`, `SessionInfo`, `PropertyMatcher`, `Nth`, `ExtenderHandler`, `GetRequest`, `RedisCommandArgument`, `ChannelEthContract`, `ModulePath`, `VerifyOptions`, `OpenApi`, `ObjectLayer`, `TaskDefinitionRegistry`, `TestMethod`, `MapLayer`, `React.PropsWithChildren`, `CommandLinePart`, `ClassBuffer`, `ImportInterface`, `CompilerError`, `BuildSourceGraph`, `QueryFunction`, `IMergeBlock`, `LRUCache`, `FileSystemCommandContext`, `ColorHelper`, `PipeDef`, `CanvasIcon`, `RelayerUnderTest`, `TimelineProps`, `Ulimit`, `ApplicationInfo`, `LocaleNode`, `BeInspireTreeNodes`, `GetReadinessCheckResourceStatusCommandInput`, `FileType`, `TestTreeHierarchyNode`, `ShouldShow`, `StoredState`, `ButtonComponent`, `SpecHelperConnection`, `AutoCompleteContext`, `vscode.TreeView`, `messages.TestStep`, `TilemapProject`, `BroadcastTxResponse`, `Int8`, `TreeNodeService`, `CircleBullet`, `EmailConfirmationHandler`, `FileReader`, `DomainName`, `BedrockServiceConfig`, `SavedObjectReference`, `Lease`, `IDebugProvider`, `OpenObject`, `ExtendOptions`, `MarketsAccount`, `NodeFetchHttpClient`, `ThermostatSetpointType`, `GoAction`, `StagePanelsManager`, `fabric.Image`, `TimePrecision`, `DirectiveHarvest`, `IORedisInstrumentationConfig`, `LazyMedia`, `Z`, `ObjectStore`, `___JSE_XLSX___Node`, `DomNode`, `OrbitControl`, `Injection`, `connection`, `PropertyKey`, `ImageryMapExtentPolygon`, `AsyncStateNavigator`, `WebhookPayload`, `PrimaryTableCol`, `JobQueue`, `ODataResource`, `AssetBindings`, `DataManager`, `MessagePacket`, `_SelectExplanation`, `TileCoordinate`, `cormo.Connection`, `UserFilter`, `ListCustomPluginsCommandInput`, `ClientType`, `DeepLink`, `DeprecationsRegistry`, `KeyValue`, `BlobModel`, `HTMLRewriter`, `XPCOM.nsXPCComponents_Results`, `MODEL`, `GraphGroup`, `UrlMapping`, `HasInfrastructure`, `$p_Predicate`, `TaskClient`, `QueryServiceClient`, `fromUserActions.GetReviewersStatisticsCollection`, `TypeName`, `ObservableConverter`, `EditRepositoryCommand`, `MigrationData`, `RibbonEmitterWrapper`, `WhereBuilder`, `basic.Token`, `GDQBreakBidManyOptionElement`, `AppComponent`, `IExecutableContext`, `ISpace`, `Crypto`, `EllipseProps`, `AutoScalingConfigurationSummary`, `IHydrateResult`, `RenderValue`, `vec3`, `Conv3D`, `StatelessOscillator`, `Json.ParseResult`, `FileSystemAdapter`, `TaskService`, `RestRequest`, `IRenderable`, `SongBundle`, `THREE.Vector3`, `SdkIndexFrame`, `ControlFlowGraph`, `Register64`, `MDCMenuSurfaceFoundation`, `RBNFRule`, `VisType`, `OrganizationalUnitResource`, `PostComment`, `ApiService`, `SchemaEntry`, `HKTFrom`, `Colord`, `IStats`, `LinksList`, `ListType`, `CreateQueueCommandInput`, `GoogleAuthProvider`, `ESMessage`, `CurrencyService`, `CPU6502`, `CheckNodeResult`, `DocumentContext`, `UnicornInfo`, `IpAddress`, `KeyValueChangeRecord_`, `ExtensionInfo`, `OptimisticLockError`, `CppSettings`, `IAzureQuickPickItem`, `HoverInsertActions`, `TextureType`, `ChoicesEntity`, `S2GeometryProvider`, `LineUpJSType`, `EventTouch`, `Align1`, `cPhs__Status`, `IGroupCellRenderer`, `PersistencyBlockModel`, `TileProps`, `SidebarButtonProps`, `ViewSlot`, `DeployState`, `VariablesManager`, `RTCRtpSendParameters`, `FaunaNumber`, `SharePlugin`, `DataModel.CellRegion`, `ConnectionHealthPolicyConfiguration`, `UseTransactionQueryState`, `GraphRequest`, `CompressorOptions`, `ListManagementAgentImagesRequest`, `ConfigAccumulator`, `CategoryState`, `Interface2`, `ClipPreRenderContext`, `PortalWorldObject`, `styleProps`, `EnvelopeGenerator`, `SpringSequenceStep`, `StackGroupPath`, `PaginatedTiles`, `React.ErrorInfo`, `KeywordErrorCxt`, `Test.TestLogType`, `ImportLookupResult`, `TaskProvider`, `INestApplicationContext`, `StatusFollow`, `gPartial`, `ProposedPosition`, `HumidityControlSetpointType`, `RenderPlugins`, `MessageProps`, `RequestQueryParser`, `CameraContext`, `ContactsProvider`, `IDanmaTrackInfo`, `RxTranslation`, `CalderaElement`, `MyContext`, `RenderNodeAction`, `PatternLayout`, `DateTimeFormatOptions`, `iDataTypes`, `ParsedFileContent`, `IOAuth2Options`, `PQueue`, `browsing.FilesView`, `TopicOrNew`, `CategoriaProps`, `IndexUUID`, `AudioStreamFormatImpl`, `ColumnType`, `ContextMenuParams`, `ResponseGenerator`, `Conv2DProgram`, `ParseConfig`, `TypedAction`, `EditorInterface`, `TestcaseType`, `StringAtom`, `WorkbenchPageObject`, `ExtendedAppMainProcess`, `TransactionDescription`, `JSONSourceData`, `MoneyAmount`, `BoolTerm`, `DynamicInputModel`, `OutputParametricSelector`, `XYAndZ`, `IgApiClient`, `Agenda`, `CollapsedFormatField`, `ScopedObjectContextDef`, `WaitOptions`, `CliProfileManager`, `TransferOptions`, `AggregationType`, `PrismScope`, `cytoscape.NodeSingular`, `DerivedAtomReader`, `CreatePostInput`, `DataSourceOptions`, `DAVObject`, `Types.GenerateOptions`, `SchemaEnum`, `IParameter`, `TSTypeLiteral`, `EventFragment`, `TaskTypeDictionary`, `GraphicsGroup`, `RenderSprite`, `TPluginsSetup`, `MemberLikeExpression`, `CraftDOMEvent`, `ContextualIdentity`, `HistoryRecord`, `TileData`, `Insets`, `NSArray`, `NineZoneNestedStagePanelsManager`, `ValidatorProxy`, `OutputCache`, `MooaApp`, `SupportedExchange`, `RequestPausedEvent`, `LineSelection`, `NgxFileDropEntry`, `CompactProtocol`, `ISmartContract`, `requests.ListAutonomousContainerDatabasesRequest`, `FrontendLocaleData`, `HTMLIonItemElement`, `ITree`, `AppRegistryInfo`, `BigNumber.Value`, `OAuth`, `LibraryInfo`, `ModalHelperOptions`, `TileMapAssetPub`, `StyleBuilder`, `MarkdownSimpleProps`, `WebGLShaderPrecisionFormat`, `displayCtrl.IShowConfig`, `SearchParameters`, `EnrichedAccount`, `RefLineMeta`, `HierarchyFacts`, `AnimationPosition`, `PublicTransition`, `Q.Deferred`, `PrimitiveAtom`, `WritableOptions`, `FirebaseUserModel`, `TestInterface`, `WithReservedWord`, `CreateProjectResponse`, `IAggConfigs`, `TaroNode`, `SystemVerilogSymbolJSON`, `WalletInitializationBuilder`, `BuildPageRangeConfig`, `OptionalVersionedTextDocumentIdentifier`, `TestEmbeddable`, `CommandType`, `Length`, `TaskRunner`, `LinuxDistribution`, `CommanderStatic`, `ResolverRelation`, `IErrorInfo`, `ExploreResult`, `DecimalArg`, `ObserverResponse`, `IModule`, `UnionMember`, `CreateViewNode`, `MDL0ModelInstance`, `UsageInfoHoverInfo`, `ListVaultReplicasRequest`, `XTransferSource`, `NetworkTraceData`, `TransactionSignature`, `NativeStackScreenProps`, `ISearchQuery`, `EventSubscriptionCallback`, `ListServiceQuotasCommandInput`, `BasicEnumerable`, `DiscoveredClass`, `MockEventListener`, `ProviderObservedParams`, `d.LoggerTimeSpan`, `Schema$RowData`, `BooksState`, `tmrm.TaskMockRunner`, `ManagementApp`, `RollupResult`, `PropertyPair`, `StagePanelManagerProps`, `SpreadStorableMap`, `StackUtils`, `GfxVertexAttributeDescriptor`, `IDBPCursorWithValue`, `RequestContract`, `GfxBufferBinding`, `ImageAndTrailImage`, `OutputDataSource`, `MoveEvent`, `SoFetch`, `BotMiddleware`, `Documentation`, `i18n.IcuPlaceholder`, `BulkApplyResourceAction`, `PublishDiagnosticsParams`, `D3Link`, `ts.AnyObject`, `Preview`, `Behavior`, `GUI`, `ResponsiveValue`, `MarkerData`, `KontentItemInput`, `BufferType`, `Language`, `PoiManager`, `RelayerRequestSignatureValidator`, `Village`, `Verdaccio`, `EvaluatedScript`, `GetExperimentCommandInput`, `Tick`, `ToRunType`, `MultiMap`, `AgentQq`, `ITaskContainer`, `GX.TevScale`, `BigNum`, `ConfigFileExistenceInfo`, `SecurityPermission`, `loader.Loader`, `ColumnInstance`, `CpeDeviceConfigAnswer`, `HTTPAuthorizationHeader`, `IndexFileInfo`, `SearchInWorkspaceResult`, `Highlighter`, `AzureFileHandler`, `IContextLogger`, `IDifferences2`, `DealCriteria`, `ParserErrorListener`, `IMineMeldAPIService`, `ContractOperationCallback`, `AlainSTConfig`, `ResourceStatus`, `TPLTextureHolder`, `TimeOpStatementContext`, `DriverModel`, `EventHandler`, `PrivateEndpoint`, `DiscordUser`, `StopTransformsRequestSchema`, `ParamWithTypeMetadata`, `W7`, `OnChildElementIdArg`, `InferableComponentEnhancerWithProps`, `CdkRowDef`, `SetupDeps`, `FunctionFiber`, `IUIAggregation`, `CoinSelectOptions`, `RelayerTypes.PayloadEvent`, `INodeParameters`, `TreeNode2`, `SyntaxCheck`, `ts.ParseConfigHost`, `PaymentOpts`, `AccessKeyStorageJson`, `FieldEntity`, `ReviewComment`, `NonEmptyArray`, `PSIReal`, `CSSOpts`, `ErrorEvent`, `MarkerClustererOptions`, `NetworkgraphLayout`, `ParsedNode`, `CustomHelpers`, `MigrateResolve`, `Polynomial`, `Mailbox`, `PathVal`, `ARMRomItem`, `Raw`, `RowVM`, `IGenericField`, `BoundingSphere`, `IUserRole`, `MultiSegmentArena`, `CardContentProps`, `IEventData`, `DataGridRow`, `KnownMediaType`, `FieldVisitor`, `FabRequestResponder`, `item`, `RawError`, `IDerivation`, `Requester`, `IVirtualDeviceValidatorResultItem`, `Tx`, `DependencyItem`, `UpdateResponderRecipeResponderRule`, `Truffle.Contract`, `HighContrastModeDetector`, `IRCMessage`, `DatasourcePublicAPI`, `MsgSignProviderAttributes`, `TileState`, `DialogItemValue`, `IResolveWebpackConfigOptions`, `Markup`, `FilterRequest`, `AdagradOptimizer`, `MappedDataRow`, `ParamModel`, `EvictReasonType`, `AbstractRunner`, `QualifiedOTRRecipients`, `RowLayoutProps`, `PathDefinition`, `DiagnosticAndArguments`, `PluginMetrics`, `ContainerInfo`, `Flanger`, `UnitMusteringRule`, `TsInterfaceInfo`, `BasicKeyframedTrack`, `InterfaceNamespaceTest`, `MapSubLayerProps`, `AnimationArgs`, `Fermata`, `IPickState`, `AzureAccount`, `SslConfig`, `DescribeReplicationTaskAssessmentRunsCommandInput`, `MerchantGamePrizeEntity`, `Comp`, `InputNodeExpr`, `LwaServiceClient`, `CollisionStartEvent`, `ExpressionStepDefinition`, `GXMaterialHelperGfx`, `FiltersActions`, `AccountFacebookInstantGame_VarsEntry`, `WebrtcConn`, `SplinePoint`, `BarChartOptions`, `SwiftVirtualNetwork`, `A`, `IPFSFile`, `S1`, `PositionalArgument`, `SlpTransactionDetails`, `TypingIndicatorStrings`, `ListJobsCommand`, `GetUpdateConfigParams`, `theia.WebviewPanelShowOptions`, `SRule`, `ProtocolType`, `DomainBounds`, `IBaseImageryPluginConstructor`, `APIResponse`, `SearchFilterConfig`, `requests.CreateCertificateRequest`, `LogAnalyticsSourceEntityType`, `AsyncActionCreators`, `ResourceManager`, `protos.common.IApplicationPolicy`, `NoiseServer`, `InstanceKey`, `ObjectIdentifier`, `RenderService`, `DescribeOrganizationCommandInput`, `ComponentCompilerMeta`, `APIResource`, `IExistenceDescriptor`, `CoreConfig`, `ActorRenderModeEnum`, `HostString`, `PlanService`, `DraggedWidgetManagerProps`, `Ship`, `SeriesPoint`, `X12QueryResult`, `sdk.SpeechRecognitionResult`, `MqttClient`, `NotDeciderInput`, `AppDispatch`, `SteamTree`, `EsErrors.ElasticsearchClientError`, `OpenYoloProxyLoginResponse`, `BaseFullNodeDeploymentConfig`, `OutboundTransport`, `MockedResponseData`, `Glue`, `Numeric`, `PythonShellError`, `SVErrorLevel`, `RemoveOptions`, `IHomebridgeUiFormHelper`, `ColumnBuilder`, `VendorType`, `ISlideRelMedia`, `FormatToken`, `PanelConfig`, `Timeslice`, `FakeShadowsocksServer`, `EventProxy`, `CapsuleColliderShape`, `Configurations`, `Joint`, `IViewRegionsVisitor`, `Pojo`, `OpenSearchDashboardsServices`, `ExecutionStatus`, `MethodParam`, `HarnessAPI`, `Polymorphic`, `HookNextFunction`, `CommonTableExpressionNode`, `EntityAdapter`, `ContextWithMedia`, `SmartBuffer`, `ServiceProxy`, `WeatherService`, `UseCaseFunction`, `LinearRegressionResult`, `JOverlap`, `Taint`, `Gadget`, `ICliCommandOptions`, `ENDProgram`, `DisclosureInitialState`, `Current`, `UpdateRuleGroupCommandInput`, `ParseArgument`, `Try`, `ResourceLimitExceededException`, `MessageEnvelope`, `util.StringEncoding`, `ast.WhileNode`, `NLClassifierOptions`, `PostProcessingFactory`, `HiFiCommunicator`, `KibanaFeature`, `SystemLayout`, `TinyCallContext`, `long`, `HSD_TExpList`, `SplitStructureAction`, `EngineDefaults`, `VpcConfiguration`, `AcceptInvitationCommandInput`, `ForgotPasswordEntity`, `WebSocket.ErrorEvent`, `WebhookSettings`, `GfxQueryPoolP_GL`, `AuxPartition`, `CheckerOption`, `MockSetup`, `IPartialLocaleValues`, `Exercise.Question`, `AnimationEntryMetadata`, `PromiseQueue`, `BroadcastEvent`, `SearchInputProps`, `AnimationKeyframesSequenceMetadata`, `EmbeddableInput`, `VideoPreference`, `DeleteImageCommandInput`, `IndexThresholdAlertParams`, `DataTableService`, `EventModel`, `FormConnectionString`, `_DeepPartialObject`, `SavedObjectsBulkUpdateOptions`, `ListAssetsCommandInput`, `CommonLayoutParams`, `DebugProtocol.Request`, `ThroughStream`, `py.ScopeDef`, `GrainPlayer`, `DocumentInput`, `ContainerModel`, `LoadingIndicatorProps`, `AnimationTransform3D`, `ServerOptions`, `UIToast`, `PreprocessCollector`, `DataFrameAnalyticsConfig`, `StreamPipeOptions`, `server.AccessKeyId`, `VideoLayer`, `PublicRelayerConfig`, `GenerateAsyncIterable`, `Cypress.PluginEvents`, `MintAssetRecord`, `MoveSeq`, `PeerRequestOptions`, `ClassMetadata`, `Selector`, `MyAccountPage`, `IBaseTransaction`, `ICondition`, `AddressHashMode`, `ToneEvent`, `StorageLocation`, `FilterCreator`, `RenderContext`, `VulnerabilityAssessmentPolicyBaselineName`, `HTTPProvider`, `CardImage`, `CredentialManager`, `AppWindow`, `SuiteThemeColors`, `ColorData`, `BitcoinCashAddressFormat`, `SrtcpSSRCState`, `UpdateContext`, `Mapping`, `ScaleContinuous`, `ChannelStoreEntry`, `AndroidProjectConfig`, `DiagnosticSeverity`, `fhir.DocumentReference`, `AthenaClient`, `DOMExplorerClient`, `TypeChange`, `ProofRecord`, `KDF`, `CasesClientArgs`, `RawModule`, `LABEL_VISIBILITY`, `TQuestionFull`, `requests.ListComputeCapacityReservationsRequest`, `IToast`, `NoteItemDummy`, `MetadataRecord`, `IPCMessagePackage`, `StreamEmbedConfig`, `DescribeProjectCommandInput`, `tf.Scalar`, `IMiddlewareEvent`, `PoolClientState`, `Resizable`, `Telegraf`, `TypeOrmModuleOptions`, `PhysicsBody`, `AddToQueryLogDependencies`, `ShipBlock`, `RequestHeaders`, `ITreeNode`, `StynPlugin`, `HighlightedType`, `NumericF`, `ProductInformation`, `CommunityDataService`, `CompatibleDate`, `Tenancy`, `ConnectionStore`, `FindOneOptions`, `IconDefinition`, `Shrewd.IDecoratorOptions`, `PublishParams`, `Observed`, `RequestResult`, `NameType`, `OperationModel`, `PiLanguage`, `SecurityManager`, `DescribeChannelModeratorCommandInput`, `AudioData`, `Computer`, `LastError`, `MyPromise`, `ProjectRiskViewEntry`, `InputOperation`, `NavigationViews`, `FetchArgs`, `ISlickRange`, `MockSegmentStore`, `MultipleInterfaceDeclaration`, `events.Args`, `LocaleService`, `FlowNarrowForPattern`, `ts.ImportClause`, `OES_vertex_array_object`, `BreadcrumbsNavProps`, `HotswappableChangeCandidate`, `IParameterValuesSource`, `MsgDeleteProvider`, `TSBreak`, `MyCombatLogParser`, `CreateDataAssetDetails`, `ActorContext`, `QueryFilter`, `MatDateFormats`, `CoreSetup`, `FunctionToMemoize`, `ContractCall`, `TaskOption`, `ListTagsForStreamCommandInput`, `QueryMnemonic`, `SlideElement`, `SDPCandidateType`, `ICombinedRefCheck`, `TransactionExplanation`, `OutOfProcessStringReader`, `IBasePickerSuggestionsProps`, `EntityProperty`, `AllowedParameterValue`, `EnvironmentConfig`, `LiteralMap`, `MyClassWithReturnExpression`, `Jimp.Jimp`, `SignupDTO`, `GeneratedCodeInfo_Annotation`, `ColorPickerEventListener`, `MenuData`, `DeleteChannelCommandInput`, `Mdast.Root`, `ToolbarUsage`, `TemplateTermDecl`, `SVAddr`, `EffectAction`, `StepFunctions`, `UserInfo`, `GraphNodeID`, `BungieService`, `BIP85Child`, `CGOptions`, `NetworkType`, `CreateGroupCommandInput`, `HdPrivateKey`, `SerializedPrimaryKeyOptions`, `GovernorOptions`, `Models.OrderStatusUpdate`, `VillainService`, `SendData`, `InternalCoreSetup`, `BullBoardQueues`, `MutationCallback`, `SerializationService`, `IKeyObject`, `React.FocusEvent`, `MonitoringGroupContext`, `TypescriptParser`, `ComboBoxGroupedOptions`, `AuthActionTypes`, `IndexType`, `CpuRegister`, `ERC1155Mock`, `CreateDiagnostic`, `t.VariableDeclaration`, `StateCreator`, `Index`, `msRest.ServiceCallback`, `LookupDescriptor`, `Star`, `PageOptions`, `DebugProtocol.ContinueArguments`, `PaymentResource`, `SettingsSpec`, `PreQuestInstance`, `Recorded`, `QueryResult`, `SelectBase`, `JsonRpc`, `MangoAccount`, `DestinationOptions`, `SymbolAccessibilityDiagnostic`, `CrochetTrait`, `TokenTransferPayload`, `ModdedDex`, `FeedbackState`, `IGetPaymentInput`, `QueryDetails`, `WebSiteManagementModels.AppServicePlan`, `BoundMethodCreator`, `LocalizeRouterSettings`, `SceneGraphNodeInternal`, `MockedOptions`, `ClampedValue`, `GitQuickPickItem`, `WebDNNCPUContext`, `RE`, `HdRipplePaymentsConfig`, `ThisTypeNode`, `IGenericDeclaration`, `PlantProps`, `ListRulesRequest`, `EventReporter`, `StaticSiteARMResource`, `String`, `Mocked`, `RollupError`, `HelmetData`, `SitecorePageProps`, `monaco.languages.ProviderResult`, `LeafonlyBinaryTree`, `ContractParameter`, `BaseCoin`, `CompileTarget`, `StackHeaderInterpolatedStyle`, `CachePage`, `CapnpVersion`, `PacketParams`, `AggParamEditorProps`, `Peer.DataConnection`, `ActionData`, `FileIdentifier`, `ResolveStore`, `ChildProcessWithoutNullStreams`, `UserMusicDifficultyStatus`, `FourSlashFile`, `Additions`, `UseMediaState`, `AggregateColumnModel`, `RouteComponentProps`, `NbDialogRef`, `IAuthenticationService`, `multiPropDiff`, `providers.BlockTag`, `P2PRequestPacketBufferData`, `GetParseNodes`, `UpdateProjectCommandInput`, `Memory`, `ir.Expr`, `IOctreeObject`, `Dirigibles`, `Spread`, `Backend`, `DecodedLog`, `Knex.QueryBuilder`, `ValueParserParams`, `DependenceGroup`, `GroupState`, `ContextMessageUpdate`, `MessageContext`, `OnExistingFileConflict`, `ResetDBClusterParameterGroupCommandInput`, `JsonaProperty`, `VocabularyModel`, `PopupProps`, `IMatchWarriorResult`, `PrismaClientClass`, `NuclearMeta`, `SemanticTokens`, `worker.IWorkerContext`, `ICtrl`, `FieldValidation`, `DynamicIndentation`, `LoadedExtension`, `ParsedImport`, `CompatConfig`, `StrokeCountMap`, `Light_t`, `Filterer`, `ScannedElementReference`, `ExportJob`, `WheelmapFeature`, `AnimationState`, `FastifyTypeBoxRouteOptions`, `DaprManager`, `PackageTreeItem`, `INodeWithGlTFExtensions`, `Reconciler`, `PortingLocation`, `ChangePasswordInput`, `Cancellable`, `GatewayTreeItem`, `AdaptMountedPrimitiveElement`, `vscode.StatusBarItem`, `ToastRequest`, `PaymentChannel`, `GLBuffer`, `IMetric`, `esbuild.Metafile`, `SurveyConfig`, `ModalWrapperProps`, `ElUploadRequest`, `BitStream`, `BuildComparator`, `Restriction`, `MlJobWithTimeRange`, `GenerateConfig`, `Hsva`, `Modules`, `Topology`, `LiteralLikeNode`, `parser.Node`, `SignedBy`, `DevToolsExtension`, `IRawDiff`, `ReshapeLayerArgs`, `JPADynamicsBlock`, `MutationRecord`, `OtherInterface`, `Blockly.Workspace`, `CellConfig`, `FileSystemError`, `TEDirective`, `AESCipher`, `PutBucketLifecycleConfigurationCommandInput`, `NumbersImpl`, `CustomerContact`, `GetJobCommandInput`, `RegExpMatcher`, `OutChatPacket`, `AsyncOrderedIterable`, `HTMLScLegendRowElement`, `OrderPair`, `ResourceConflictException`, `GradleVersionInfo`, `polymer.Base`, `BodyPixInput`, `BBoxObject`, `StartTransformsRequestSchema`, `ResolvedType`, `WesterosCard`, `IProductTranslatable`, `Iterator`, `JiraColumn`, `RouterHistory`, `PathTargetLink`, `IfNode`, `IResolver`, `MonitorRuleDef`, `CustomDomComponent`, `IRegisterNode`, `SendCustomVerificationEmailCommandInput`, `ts.ParseConfigFileHost`, `NavBarProps`, `DominoElement`, `CalcAnimType`, `ExpoConfig`, `NzCascaderOption`, `EntityName`, `ManyApiResponse`, `TdDataTableService`, `ResolvedModule`, `GeoJsonObject`, `LegacyTxData`, `ModuleName`, `VFC`, `ScopeHook`, `WebContents`, `IFileStat`, `UpdateClusterCommandInput`, `T4`, `BuildFile`, `TimeBucketsInterval`, `DataDown`, `ExtractResponse`, `RegionService`, `ListTypesCommandInput`, `DaffCategoryFilterRangePair`, `PointItem`, `MatMulPackedProgram`, `FormErrors`, `MatOptionSelectionChange`, `... 28 more ...`, `WebAssemblyInstantiatedSource`, `ExampleRecord`, `HasJSDoc`, `FetchHandle`, `IntervalTree`, `CheckState`, `LogSeriesFragment`, `ArenaCursor`, `ImportPath`, `HTMLVmPlayerElement`, `MatDrawer`, `LuxonDateTime`, `ISensorProps`, `CreateDatasetImportJobCommandInput`, `PolyfaceAuxData`, `Hashable`, `TreeEnvironmentContextProps`, `OptionType`, `MarkdownProps`, `ICrop`, `RowBox`, `IdentityProviderMetadata`, `SqrlInstance`, `StateByProps`, `PluginFunction`, `Motion`, `ValidResourceInstance`, `IPhase`, `Uint8Array`, `protos.common.SignaturePolicyEnvelope`, `requests.ListAvailableWindowsUpdatesForManagedInstanceRequest`, `MediaTrackSupportedConstraints`, `UpdateParameterGroupCommandInput`, `StopHandle`, `DisposableSet`, `Pbkdf2Digests`, `InferredProject`, `ICacheItem`, `NormalizedDiagnostic`, `WebGLUniformLocation`, `ViewPort`, `ValueMetadataDuration`, `StreamingClient`, `Branched`, `AllocationDoc`, `IntegratedSpiral3d`, `DropData`, `PrintStackResult`, `ApifySettings`, `RedisAdapter`, `SelectableListService`, `ITransferItem`, `ImgType`, `ValueQuery`, `KeyedTemplate`, `TermType`, `JsonPayload`, `InternalErrorException`, `AssertTrue`, `BinarySwitchCCReport`, `IComboBoxOption`, `IconItem`, `FindProsemirrorNodeResult`, `LabelPropertyDataFilterer`, `TidalExpression`, `MessagingServiceInterface`, `NodeModel`, `CallHierarchyPrepareParams`, `VFSRef`, `WrappedValue`, `DataConverter`, `GainNode`, `VirtualHub`, `On`, `HLTVConfig`, `FontVersion`, `MyAppProps`, `StripePaymentSession`, `AnnotationCollection`, `InternalHandler`, `IAmazonServerGroupCommandResult`, `GradientObject`, `HookRegistry`, `PossiblyAsyncHierarchyIterable`, `FastFormContainerComponent`, `Sorting`, `PreparedFn`, `HorizontalTable`, `ObjectFlags`, `MIRAssembly`, `VirtualItem`, `IHomebridgeAccessory`, `IViewbox`, `IVueAuthOptions`, `IOpenFileItemsState`, `MilestoneActivity`, `LoginScript`, `PiValidatorDef`, `OptionDefinition`, `SeriesParam`, `IStatusFile`, `UnwrappedObject`, `ResolvedInfo`, `IListenerDescription`, `AssemblyExpressionContext`, `DeleteMembersCommandInput`, `ExpBoolSymbol`, `ObjectLiteralExpression`, `StringifiedType`, `textChanges.ChangeTracker`, `UnorderedQueryFlow`, `ToastrService`, `msRest.RequestOptionsBase`, `ErrorBarStrings`, `ParseErrorCode`, `VerticalRenderRange`, `IDiagnosticsResults`, `FullIconCustomisations`, `TPackageJson`, `ShapeTreeNode`, `SortKeyParams`, `NavigatableWidget`, `ITransactionRequestConfig`, `Positions`, `CommandClassDeserializationOptions`, `PlanetApplicationRef`, `CodeMirror.EditorConfiguration`, `SimpleClass`, `CreateProjectDto`, `py.AST`, `LayerDescriptor`, `IParticleSystem`, `GX.IndTexScale`, `TestDeployRetrieve`, `ItemRenderer`, `RegisteredActionRunner`, `IAmazonLoadBalancer`, `FragmentSpreadNode`, `MarkerInstanceType`, `IWorkflowExecuteAdditionalData`, `V3SubgraphPool`, `PythonPreviewConfiguration`, `XDomain`, `Proto.FileLocationRequestArgs`, `SeriesSpecs`, `TasksPluginReminderModel`, `RenderRow`, `WsKey`, `GetOperationCommandInput`, `DataModels.Kpi.ActiveTokenList`, `MatchersObject`, `FnN5`, `TransistorEpisodeData`, `AbortSignalLike`, `VirtualDeviceScript`, `IndexPatternPrivateState`, `MdcSnackbarContainer`, `MappingTreeObject`, `RequesterAuthorizerWithAirnode`, `SHA384`, `ModuleMap`, `SemanticTokensBuilder`, `SpecQueryModel`, `InternalConfig`, `AlgoliaClient`, `CardRequirements`, `GfxSwapChain`, `FileSystemEntry`, `DebugBreakpoint`, `UpdatedLazyBuildCtx`, `UpdateStateValueFunc`, `dataType`, `LoginCommand`, `IInventoryItem`, `XMLElement`, `Arc3d`, `B13`, `requests.ListDbSystemPatchesRequest`, `MdcListItem`, `CartItem`, `DeviceAccess`, `RefactorAction`, `GraphQLFieldConfigArgumentMap`, `LocationChangeListener`, `ISelectOption`, `TokenizerOutput`, `SearchQueryBuilder`, `TElement`, `TReferences`, `Requireable`, `BreadCrumb`, `EllipticPair`, `TrackerEvent`, `HookProps`, `ConcreteLaunchOptions`, `InterfaceWithDictionary`, `AthleteModel`, `PresentationRpcResponse`, `CheckerResult`, `RenderSchedule.ScriptProps`, `TestRenderTag`, `Articles`, `UseLazyQueryState`, `JavaDownloadRelease`, `TouchList`, `pw.Frame`, `CW20Addr`, `Exception_Type`, `ReportingCore`, `ParseContext`, `GeoAngle`, `CommandInteraction`, `NodeState`, `ConsumerExtInfo`, `HandleEvent`, `CommandInstance`, `DeleteParameterGroupCommandInput`, `TMetricAggConfig`, `Distance`, `Comparator`, `ConfigChoice`, `DsnComponents`, `Part`, `ImportType`, `requests.ListPrivateIpsRequest`, `SessionStore`, `UpdateEndpointCommandInput`, `requests.ListComputeCapacityReservationInstanceShapesRequest`, `CollectorState`, `CommandLineConfiguration`, `Ec2MetricChange`, `ThrottlerHelper`, `IEditorProps`, `IPushable`, `NgrxJsonApiStoreResources`, `SensorGroup`, `Immutable`, `requests.ListWaasPoliciesRequest`, `GroupByPipe`, `TranslateAnswerConfig`, `AdaptFuncT`, `EmptyParametersGatherer`, `DashboardAppLocatorDefinition`, `DOMPointInit`, `PluginNamingConfiguration`, `Indexer`, `NodeEvent`, `NavTree`, `CreatePageReq`, `StorageInfo`, `EntityCacheQuerySet`, `WorkRequestStatus`, `DraftEditorCommand`, `RouterRes`, `ImportSavedObjectsOptions`, `Locale`, `ThemeColor`, `TimedParagraphItem`, `ZoneDefinitionModel`, `PartyDataSend`, `PopupComponent`, `ICreateFormDialogState`, `CommitDetails`, `ex.PostUpdateEvent`, `IView`, `FilterValue`, `TimeUnit`, `PackageService`, `CredentialsService`, `Ohm.Node`, `apid.DropLogFileId`, `Deno.Process`, `Tracklist`, `LogItemProps`, `BaseAdapter`, `GraphQLQueryBuilder`, `DebugConsole`, `TypeLiteral`, `common.ClientConfiguration`, `TreeListComponent`, `SummaryPart`, `MessageInterface`, `RowAccessor`, `Parjser`, `QueryEngineBatchRequest`, `CustomConfig`, `JoinPredicate`, `ContextStore`, `AddOutputRequest`, `GoogleAnalyticsService`, `IProvisionContext`, `CouncilProposal`, `IEcsServerGroupCommandResult`, `SubjectDataSetJoin`, `QueueNode`, `NotFoundError`, `GetAccessorDeclaration`, `Postprocessor`, `QRCodeScheme`, `NormalizationHandler`, `t_3b6b23ae`, `RelationMeta`, `DirectedScore`, `PresetInfo`, `IndexerError`, `GraphQLScalarType`, `CommonLanguageClient`, `AppController`, `MarkdownNode`, `SapphireDbService`, `TokenConfig`, `LineDataSet`, `ConfigSchema`, `AuthorModel`, `ts.MapLike`, `ApiResult`, `TweetMediaState`, `TPropertyTypeNames`, `DefaultConfig`, `MyTypeDeclarative`, `requests.GetWorkRequestRequest`, `MdcSnackbarRef`, `AuxChannelData`, `VerifiedParticipant`, `ExpirationDateVerification`, `FractalisService`, `CandleGranularity`, `CVLanguageManager`, `THREE.Texture`, `GetDomainRecordsResponse`, `TextElementBuilder`, `OAuthScope`, `FuncMode`, `PDFObjectStream`, `AreaField`, `LooseValidator`, `ExpNumNumel`, `ResolvedConfigFileName`, `ISelectedEmployee`, `TabDataType`, `Export.DefaultInterface`, `DeleteJobRequest`, `RSTPreviewManager`, `IModels`, `CustomSetting`, `QName`, `TryNode`, `DynamicCstr`, `ArrayServiceArrToTreeOptions`, `MagickOutputFile`, `Slot`, `ParserInfo`, `FasterqQueueModel`, `GetRecordsCommandInput`, `MerchantGameActivityEntity`, `SVGCircleElement`, `TypeBinding`, `SnakePlayer`, `NPMContext`, `SafeUrl`, `Layout`, `VerifyConditionsContext`, `d.WorkerMainController`, `firebase.firestore.Timestamp`, `PickerOptions`, `MutationTuple`, `MeasureMethod`, `TCase`, `ToastItem`, `ProjectConfigData`, `DirectoryTree`, `DestinyCacheService`, `SecurityCCCommandEncapsulation`, `Http3Header`, `MacroInfo`, `VpcData`, `FsWriteResults`, `requests.ListAppCatalogSubscriptionsRequest`, `GeneratorProcess`, `ContractReceipt`, `ParameterPath`, `Height`, `SavedQueryService`, `SummaryObject`, `MidiInstrument`, `WsHttpService`, `CreateManyInputType`, `AnimDesc`, `Obj`, `ChatStoreState`, `ProDOSVolume`, `RemoteFilter`, `PolicyType`, `VectorOptions`, `ActionFunction`, `SKLayer`, `Epsg`, `IssueLocation`, `ITableProps`, `FixedDomPosition`, `SwitchLayerAction`, `T13`, `d.TypesMemberNameData`, `RippleBalanceMonitor`, `X12Parser`, `WebView`, `TerminalVersion`, `InputListConfig`, `ScanGameFile`, `BufferId`, `Coords`, `PropertyDeclarationStructure`, `DescribeConnectionsCommandInput`, `TreeServiceTest`, `LiteColliderShape`, `RepositoryCommonSettingDataType`, `HumidityControlMode`, `AuthenticationInterface`, `Arg`, `CanvasPattern`, `DismissedError`, `NotificationType0`, `WindowLocation`, `ITexture`, `BoundCurves`, `WebPhoneSIPTransport`, `StaticCollider`, `ChainService`, `XAnnotation`, `Joi.ObjectSchema`, `FunctionCallArgumentCollectionStub`, `AbiInput`, `RepositoryService`, `PullState`, `GeoPolygon`, `QueryCallbacksFor`, `SeriesCompareFn`, `TransactionQueryPayload`, `ButtonDefinition`, `StoreEnhancerStoreCreator`, `SpinnerService`, `TSlice`, `EnumerateVisualObjectInstancesOptions`, `SymbolKind`, `WorkspaceInfo`, `d.OutputTargetCopy`, `IQueryBuilder`, `TKeyboardShortcutsMapReadOnly`, `DocumentRangeFormattingParams`, `ExchangePriceService`, `d.OutputTargetDocsCustom`, `protocol.Location`, `IJavaProjectWizardContext`, `Protocol.Input.DragData`, `VersionOperatorContext`, `tcl.Tag`, `SerializedDatatable`, `IntegrationMapService`, `SlashingProtectionAttestation`, `CloudFormationResource`, `ReportingDescriptor`, `TsChart`, `ParameterGroup`, `RunShellResult`, `TT.Step`, `AuthenticationClient`, `RootCompiler`, `PathProps`, `ParsedGenerator`, `DebugProtocol.ScopesArguments`, `TypeRef`, `AmqpConnectionManager`, `LockOptions`, `DesignerNode`, `Tsoa.Method`, `TestFunctionImportEntityReturnTypeParameters`, `STPAPIClient`, `ReadableBYOBStreamOptions`, `Testing`, `GitHubPullRequest`, `ExpressionLoader`, `NextApiRes`, `MultisigBitcoinPaymentsConfig`, `QueryObjOpts`, `BackupDestinationDetails`, `NzAutocompleteOptionComponent`, `CommitChangeService`, `ImageResult`, `DocParagraph`, `d.ComponentCompilerListener`, `NotificationCCReport`, `PushContextData`, `StackEvent`, `WrapLocals`, `XsuaaServiceCredentials`, `PrivateEndpointConnection`, `IDialogContext`, `TestResponse`, `EventCategory`, `ICommandPalette`, `BarGeometry`, `LoginComponent`, `SavedObjectOpenSearchDashboardsServicesWithVisualizations`, `DataSourceState`, `UpdateCustomVerificationEmailTemplateCommandInput`, `AnimationTrackComponent`, `LocalBlockExport`, `RemoteRequest`, `d.NodeMap`, `ListNodegroupsCommandInput`, `Pbf`, `ForInStatement`, `CbExecutionContext`, `ElementInlineStyle`, `PaginationComponentOptions`, `InspectorOptions`, `core.Connection`, `TargetGraphQLType`, `ProdutoDTO`, `IInternalActionContext`, `ITestResult`, `ClipboardWatcher`, `Conv1D`, `ExampleProps`, `ResizeObserverCallback`, `BranchFlagStm`, `P`, `UserStatsState`, `Session.ISession`, `DiscoverServices`, `ExecutedQuery`, `PackageDependency`, `QueryFieldMap`, `CurriedGetDefaultMiddleware`, `FileList`, `WizardComponent`, `PDFHexString`, `PddlExtensionContext`, `CreateAttendeeRequestItem`, `BlockPointer`, `Instance`, `IGraphicOption`, `ParticipantResult`, `localVarRequest.Options`, `RefCallback`, `Cipher`, `DirectoryDiffResults`, `AuthActions`, `NodeWallet`, `IFilterOptions`, `SourceTarget`, `FormatCodeOptions`, `ErrorContinuation`, `XMessageBoxAction`, `ServerEntry`, `PutReportDefinitionCommandInput`, `QueryOne`, `PR`, `StatedBeanMeta`, `FeedId`, `InstantiationNode`, `Meal`, `Group`, `Md.List`, `SupportedFiletypes`, `GfxRenderInstList`, `SpecificEventListener`, `FlexPlacement`, `DirectionConstant`, `IterationDirection`, `SrvRecord`, `IPass`, `SecretKey`, `SmartHomeHandler`, `SearchQueryUpdate`, `ResolvedProjectReference`, `PartyMatchmakerAdd_NumericPropertiesEntry`, `ShorthandProperty`, `FlatVector`, `IDataFilterValueInternal`, `android.view.ViewGroup`, `PLIItem`, `CollisionTree`, `OptionsStruct`, `SlashArgRecord`, `ColorDataObj`, `BodyPartConstant`, `EntityData`, `TuplePage`, `PipelineProject`, `ClusterNode`, `EventLocation`, `EventFnSuccess`, `GraphicsShowOptions`, `d.PrerenderUrlResults`, `BIP85`, `TextDocumentEdit`, `angular.ITimeoutService`, `React.ElementType`, `StandardFontEmbedder`, `T.MachineContext`, `SelectCard`, `TDataProvider`, `DsDynamicInputModel`, `SignOptions`, `BadgeStyleProps`, `sdk.SpeechRecognitionCanceledEventArgs`, `DescribeValidDBInstanceModificationsCommandInput`, `Evaluator`, `IRolesMap`, `ExportProps`, `AutocompleteFieldState`, `State.FetchStatus`, `ISpecModel`, `LastColumnPadCalculator`, `PromiseExecutor`, `Tensor`, `CustomUser`, `AbstractView`, `ProcessService`, `HostInstructionsQueue`, `By`, `SyncResultModel`, `FunctionDef`, `SubsetPackage`, `AuthPipe`, `StopItem`, `GraphQLType`, `RefsDetails`, `LeaveAction`, `ExtendedSocket`, `PluginFunctions`, `SuperTest.SuperTest`, `Scene`, `TimeOffPolicy`, `PolicyContext`, `SkillGaussian`, `d.ConfigBundle`, `NavigationState`, `HeroSelectors$`, `ParseIconsOpts`, `ExternalRouteDeps`, `LoansService`, `MultiSigHashMode`, `LogLevels`, `RlpSerializable`, `TranslationSettings`, `ConfigValue`, `DependencyInfo`, `ApplySchemaAttributes`, `RSSItem`, `StackInfo`, `UserId`, `Flattened`, `RestoreResults`, `IntelChannel`, `Tally`, `GameName`, `TransmartStudy`, `SocketChannelServer`, `Ports`, `StorageLocationModel`, `ContextMenuFormProps`, `SqliteStatement`, `FragmentableArray`, `ReplaySubject`, `GLsizei2`, `ParsedUrlQueryInput`, `HashValue`, `MapPartsShadowType`, `SeparationInfo`, `SolutionBuilderHost`, `postcss.Node`, `KernelMessage.IExecuteRequest`, `GlobalJSONContainerStorage`, `AxeResultsList`, `PluginOrPackage`, `ArangoDB.Collection`, `Mode`, `Operations`, `AddConfigDeprecation`, `ExpressionResult`, `requests.ListProtocolsRequest`, `CheckSimple`, `SFCStyleBlock`, `WriteStorageObjectsRequest`, `StandardTokenMock`, `WcCustomAction`, `MiddlewareNext`, `RuleConfigTuple`, `ConnectionDetails`, `ThyUploadFile`, `PublicIPAddress`, `PerformReadArgs`, `OrganizationRecurringExpenseService`, `QueryOpt`, `ArchTypes`, `RootAction`, `NullAndEmptyHeadersServerCommandInput`, `ClClient`, `Events.pointerdragenter`, `LineSide`, `THREE.PerspectiveCamera`, `BaseTransaction`, `BreadcrumbService`, `LineSeries`, `FloatingPanel`, `ScriptVersionCache`, `TodoAction`, `IteratorCreatorFn`, `DeleteApplicationOutputCommandInput`, `GossipPeer`, `IPascalVOCExportProviderOptions`, `VdmFunctionImport`, `IClaimData`, `MockRequestInit`, `ObjectExpression`, `Moment`, `StringToNumberSyntax`, `CueAndLoop`, `StatusService`, `Type_Interface`, `PluginAPI`, `CreateJobCommandOutput`, `ComboConfig`, `ForeignInterface`, `ExportDefaultDeclaration`, `CallbackStore`, `DropdownListItem`, `AssertionExpression`, `ConfigOptions`, `OaiToOai3FileInput`, `WithdrawalMonitorObject`, `CSSOutput`, `Shader`, `InstantRun`, `CellInfo`, `GetWebACLCommandInput`, `ErrorCacheDelta`, `FooState`, `PackageAccess`, `CustomWorld`, `ListExperimentTemplatesCommandInput`, `DefinitionInfo`, `TabInfo`, `ActiveDescendantKeyManager`, `SonarQubeApiComponent`, `CssNodeType`, `SeriesTypeOptions`, `NavigableMap`, `SelectSeriesInfo`, `CompilerFsStats`, `INote`, `IPathsObject`, `WebpackRule`, `RedHeaderField`, `OperationHandlerPayload`, `LinkedSearchProps`, `RuleTester`, `RuntimeConfiguration`, `DeleteProjectResponse`, `BinaryStream`, `TableConfiguration`, `FieldSchema`, `IPostMessage`, `ResolvedConfig`, `GitUser`, `DaffCategoryIdRequest`, `ToastConfig`, `CertificateRule`, `ResetPasswordAccountsValidationResult`, `AnimationReference`, `ParsingExtension`, `LastfmTrack`, `WalkerArgs`, `IColorV`, `TypeBase`, `Car`, `NotifyQueueState`, `GfxRenderPipelineP_WebGPU`, `MDCFloatingLabelAdapter`, `RoomReadyStatus`, `Val`, `ExtractOptions`, `ShadowGenerator`, `OP_PUSHDATA`, `RawData`, `ControllerValidateResult`, `ActivitySourceDataModel`, `BranchDataCollection`, `TResolver`, `CryptoService`, `FbBuilderFieldPlugin`, `TTag`, `DirectiveHook`, `IColorableSequence`, `types.Transport`, `CardInGame`, `IHistory`, `EventContext`, `IStaticMetadata`, `React.ForwardedRef`, `RippleRef`, `GRUCell`, `GraphQLField`, `NotificationActions`, `CompositeOperator`, `WebContext`, `ConfigurableFocusTrapConfig`, `ClozeRange`, `AugmentedAssignmentNode`, `DeployView`, `MigrationParams`, `FaunaUDFunctionOptions`, `IndicatorCCSet`, `TypeData`, `SocialSharing`, `GetSampledRequestsCommandInput`, `NavigationContext`, `ResponsiveProp`, `JsxClosingElement`, `Drawing`, `IDBValidKey`, `Attempt`, `WorkspaceProject`, `RoutesService`, `DaffCategoryFilterEqualFactory`, `SubStmt`, `SyncArgs`, `IValidationSchema`, `PvsFile`, `VisualConstructorOptions`, `PhysicalKeyboardKey`, `ICalendarEvent`, `RRI`, `RoutesMeta`, `ActionDefinition`, `ControlDirective`, `ResolveFn`, `CallMemberLikeExpression`, `TID`, `LinterConfig`, `ListEventsRequest`, `AzureCommunicationTokenCredential`, `GX_VtxDesc`, `GrpcConnection`, `FuseConfigService`, `DoClass`, `TextureDescriptor`, `InjectableDependency`, `GestureConfigReference`, `J3DModelInstance`, `GfxBuffer`, `SetOptions`, `InitObject`, `JsonDiffNode`, `ExtensionDefinition`, `IEncoderModel`, `RangeImpl`, `BUTTON_SIZE`, `JsExpr`, `MlCommonUI`, `JsonDocsDependencyGraph`, `PatchType`, `Extract`, `KeyValueDiffers`, `IModuleStore`, `RebaseConflictState`, `TokenContext`, `EthersProvider`, `NativeContractStorageContext`, `CategoryRendererItem`, `ChromeStart`, `RushConfiguration`, `MsgPieces`, `TimeBucketsConfig`, `ts.Printer`, `AppModel`, `ServiceTreeItem`, `FailureDetails`, `PackageData`, `SWRKeyInterface`, `ScriptKind`, `SelectablePath`, `CollectionResult`, `PredicateType`, `Valve`, `PublicApi`, `ReBond`, `ReplayContext`, `CanvasBreakpoints`, `IVoicemail`, `ITsconfig`, `SortPayload`, `TargetTypeMetadata`, `$DFS.DFS_Config`, `OfflineContext`, `NgForageOptions`, `Lane`, `HitSensorInfo`, `ANTLRBackend`, `DescribeDBClusterParametersCommandInput`, `IUserState`, `TypeOrTypeArray`, `StructuredType`, `MissionSetupObjectSpawn`, `StrokeProtocol`, `Feeder`, `FormLabelProps`, `TensorOrArrayOrMap`, `GraphicsComponent`, `FsItem`, `Milestone`, `TimelineDragEvent`, `IPackagesService`, `IDeferredPromise`, `GraphQLInputObjectType`, `OpenChannel`, `IGitProgressInfo`, `ToneMapping`, `Memento`, `RegisterDeprecationsConfig`, `HistoryManager`, `ComponentArgTypes`, `QueryParamsAsStringListMapCommandInput`, `IntervalTimelineEvent`, `PadplusRoomPayload`, `ITaskWorker`, `WatchCompilerHostOfFilesAndCompilerOptions`, `TundraBot`, `sinon.SinonFakeTimers`, `OrderBook`, `ModelManager`, `SelectQueryBuilder`, `Quadratic`, `ControlPanelConfig`, `LevelDocument`, `IP`, `Activator`, `RepositoryCommonSettingValueDataType`, `MessageInfo`, `NavNode`, `Builder`, `OnModifyForeignAction`, `FriendList`, `TransliterationFlashcardFieldName`, `FormatCompFlags`, `DeleteTokenCommandInput`, `Arc`, `DeviceSize`, `SubscriptionTracker`, `ArgsMap`, `AztecCode`, `IExecutionFlattedDb`, `Handlebars.HelperOptions`, `IAsyncEnumerable`, `UserUI`, `InjectableType`, `Urls`, `TKeyArgs`, `ParamsOf`, `GlobalTime`, `OnCancelFunc`, `ILanguageState`, `PermissionsData`, `KdfType`, `ClientChangeList`, `ClusterOptions`, `TSource`, `TileCorners`, `AssessmentData`, `ScheduleItem`, `IEventHubWizardContext`, `ServiceEntitlementRegistrationStatus`, `DebugGeometry`, `Macro`, `com.google.ar.sceneform.Node`, `RedioPipe`, `MdcChipAction`, `ITBConfig`, `configuration.LaunchConfiguration`, `ExistsFilter`, `ApexExecutionOverlayAction`, `ValueJSON`, `WebFontMeta`, `ServerSideEncryptionConfiguration`, `MarkSpecOverride`, `VocabularyEntryDetail`, `ts.TaggedTemplateExpression`, `CoinPretty`, `IBalance`, `ResponseData`, `IDeployedApplicationHealthStateChunk`, `AppClientConfig`, `LineLeaf`, `IndentNode`, `DayStressModel`, `SelectionSetToObject`, `S3Object`, `TransformOutput`, `BitcoinBalanceMonitor`, `INameDomainObject`, `solG2`, `UseLiveRegionConfig`, `IdentityMetadataWrapper`, `FailoverGroup`, `ServerDevice`, `ReferenceType`, `MaybeVal`, `JointTreeNode`, `EncryptedShipCredentials`, `CloudDevice`, `ContainerRuntime`, `CSTeamNum`, `AsyncWaterfall`, `FeedItem`, `InitialOptionsTsJest`, `OutputConfig`, `MapType`, `ListenerHandler`, `ParsedResult`, `SpeedDialItem`, `IProtocolConstructor`, `ILayoutContextProps`, `FullChat`, `MdcSnackbar`, `vscode.Webview`, `FrameworkEnum`, `QueryPointer`, `TestImageProps`, `IGlTFExtension`, `ParameterListContext`, `NewPackagePolicyInputStream`, `MDCChipAction`, `STAT`, `IVarAD`, `FieldHook`, `IPersonaProps`, `d.CollectionCompilerVersion`, `ConsumedCapacity`, `DeploymentEnvironment`, `EvalResponse`, `SummaryNode`, `AccountRipplePaymentsConfig`, `ExpressionFunctionParameter`, `IntersectionObserverInit`, `ITask`, `ContainerFlags`, `ValidationProblem`, `requests.ListDataSafePrivateEndpointsRequest`, `VertexEntry`, `ISqlEditorTabState`, `files.FullFilePath`, `FaastModule`, `ITokenRequestOptions`, `TextChar`, `AuthCredential`, `UpdateUserProfileCommandInput`, `CyclicDependencyGraph`, `Transcript`, `messages.Duration`, `AFSQuery`, `TIO`, `StatedFieldMeta`, `SingleObjectWritableStream`, `theia.Range`, `EntityCollectionCreator`, `StreamHandler`, `IDatabaseConfigOptions`, `NewDeviceDTO`, `d.Screenshot`, `TodoRepository`, `CreatorBase`, `ILoggerService`, `Line2`, `IAppContext`, `MyTabItem`, `GnosisExecTx`, `GfxTopology`, `ExclamationToken`, `pxtc.ApisInfo`, `Highlight`, `Datatypes`, `ListComprehensionIfNode`, `MockCacheService`, `AuthTokenResult`, `PopulatedContent`, `RequestPresigningArguments`, `TObject`, `XmlNodeNop`, `CompletionEntryData`, `DaffStateError`, `ts.ExpressionStatement`, `ExternalWriter`, `DebugProtocol.InitializeResponse`, `BufferComposer`, `DataCenterResource`, `ProductControlState`, `ResourceSpans`, `ViewNode`, `IClusterHealthChunkQueryDescription`, `CERc20`, `ITable`, `HoldSettings`, `EmbeddableActionStorage`, `ExportedData`, `MeiliSearch`, `ts.TranspileOptions`, `CryptoEffects`, `StorageProvider`, `ICheckOut`, `fse.Stats`, `DiskAccess`, `TransitionOptions`, `MetricsService`, `ESLintProgram`, `IMetricAlarmDimension`, `Transformation`, `SourceStream`, `IDeployContext`, `VerifierOptions`, `TSpy`, `OperationTypeNode`, `PinOverrideMode`, `AvailabilitySlot`, `PricePretty`, `MatSort`, `DefItem`, `CephLandmark`, `Hunspell`, `IClassicmenuRuleSpec`, `NoteResouce`, `ServiceItem`, `PageContainer`, `ToolbarChildrenProps`, `ComponentLocale`, `Rollup`, `MessengerTypes.Message`, `InputRule`, `Webhook`, `ContentConfigurator`, `OpenSearchDashboardsLegacyPlugin`, `MediaTrackConstraints`, `ITrackSequence`, `Coding`, `ClubEvent`, `ClientDetails`, `DocumentInfo`, `StateSnapshot`, `BemSelector`, `SimpleTypeMemberNamed`, `HeaderType`, `CommerceLayerClient`, `Node.JSON`, `ActionObservations`, `PanRecognizer`, `SourceFileInfo`, `NormalizeStyles`, `AccountData`, `IClientConfig`, `StorageHeader`, `XPathData`, `AutocompleteProps`, `UserTie`, `DisplayValuePropertyDataFilterer`, `SimpleSavedObject`, `BScrollConstructor`, `IAddGroupUsersResult`, `RouterStateData`, `SMTVar`, `ChildData`, `NgxPermissionsService`, `RenderTreeFrameReader`, `Rule.RuleModule`, `TimelineState`, `TerminalState`, `EventSink`, `ClassLexicalEnvironment`, `createStore.MockStore`, `RoleProps`, `Notifier`, `ICamera`, `FFT`, `React.Route`, `JSZipObject`, `FeatureStabilityRule`, `HashCounter`, `CopyButtonProps`, `FormattingContext`, `EntityStore`, `PackageJsonWithTsdConfig`, `A1`, `DestructuringAssignment`, `InitiatingWindowProps`, `InstallForgeOptions`, `MigrateFunction`, `ScriptStub`, `OrganizationContext`, `TranslationsType`, `Operation`, `ConfigHandlerAndPropertyModel`, `NotifyService`, `MessengerClient`, `ITransactionData`, `CommentProps`, `ValueConstraint`, `ToastType`, `DepositAppState`, `PerspectiveDetails`, `LevelGlobals`, `TransferTransition`, `Paint`, `Junction`, `ActivityTypes`, `LoginResponse`, `FileTypeEnum`, `SaleorThemeColors`, `NSObject`, `MockContainerRuntimeFactory`, `Node`, `MetaRewritePolicy`, `TableModelInterface`, `ForegroundContexts`, `SaveManager`, `CachedUpdate`, `ConflictType`, `PointInTimeFinder`, `ICoordinate`, `Enzyme.ReactWrapper`, `ChannelMessageUpdate`, `AnnotationTooltipState`, `RenderStatus`, `EventQueue`, `AsyncResultCallback`, `PreimageField`, `DID`, `ISeedPhraseStore`, `BindingPattern`, `ActionButton`, `SocketMessage`, `ChannelId`, `TopicStatus`, `LavalinkNode`, `I18nService`, `ArrayMultimap`, `ModelMapping`, `CreateApplicationCommandOutput`, `ExpNumSymbol`, `HashMap`, `IMYukkuriVoice`, `PutConfigurationSetDeliveryOptionsCommandInput`, `IGameUnit`, `FeaturePrivilegeAlertingBuilder`, `MDBModalRef`, `IInterval`, `ViewSize`, `IconComponentProps`, `XmlListsCommandInput`, `position`, `DisplayInfo`, `TextureConfig`, `HttpProbeMethod`, `OptionName`, `ShareAdditionContent`, `StorageMigrationToWebExtension`, `PostMessageStub`, `UsageCounter`, `WorkflowEntity`, `DashboardPlugin`, `ReactFCNoChildren`, `puppeteer.JSHandle`, `TableColumns`, `IPresentationTreeDataProvider`, `DataGroup`, `DeploymentNetwork`, `InternalNode`, `OgmaService`, `ExecutionScopeNode`, `InitialState`, `UICollectionViewLayoutAttributes`, `CommBroker`, `TAggregateCommit`, `ICompositionBody`, `GridsterComponentInterface`, `DateFnsConfigurationService`, `IVocabularyItem`, `IObject3d`, `IParseResult`, `ComputeManagementClient`, `ResolveProvider`, `RetryAction`, `DaLayoutService`, `OpDef`, `ICalendarEventBase`, `RuleFunctionMeta`, `AccessLog`, `StructName`, `postcss.AtRule`, `StateOperator`, `ContentRecord`, `DeclarationKind`, `DeleteDBClusterEndpointCommandInput`, `DynamoDB.DeleteItemInput`, `OpenSearchResponse`, `DAL.KEY_ESC`, `QuickCommand`, `ThyListOptionComponent`, `IconShapeTuple`, `SerializedNode`, `Ending`, `OsmConnection`, `RouteProps`, `ILink`, `MarkEncoding`, `TestStream`, `WordcloudUtils.PolygonObject`, `AxisStyle`, `ITrace`, `TabbedAggResponseWriter`, `StartInstanceCommandInput`, `NgWidget`, `ProcessMainAdapter`, `Budget`, `RefreshTokenService`, `InterfaceWithConstructSignatureReturn`, `Separated`, `CommandConstructorContract`, `TNode`, `InMemoryFileSystem`, `ApplicationService`, `SetOptional`, `SliceState`, `EvaluatedNode`, `ItemShape`, `ReadOnlyIterator`, `ICreateTableOptions`, `BuiltinFunctionMetadata`, `Bits`, `Reservation`, `Recording`, `YamlMap`, `GetStaticPropsContext`, `ApiRequest`, `RequestNode`, `IRequestApprovalCreateInput`, `DeleteDatasetGroupCommandInput`, `DocumentRegistry`, `FeatureCollection`, `EntityField`, `EmulateConfig`, `FormProperty`, `NativePath`, `SearchView`, `IHeaderExtensionObject`, `VariableRegistry`, `TestStore`, `ContextContributor`, `MessageImage`, `FetchVideosActions`, `DescribeDBEngineVersionsCommandInput`, `IUnitStoryChapter`, `MDCTabAdapter`, `IInternalParticipant`, `AccountServiceProxy`, `HTMLInputElement`, `PainlessCompletionResult`, `ExplorerState`, `Event`, `MockProviders`, `Newable`, `DeployStatusExt`, `ResolvedVersion`, `PLAYER`, `MarkerSnap`, `ListTagsForResourceInput`, `Apps`, `Exclude`, `UiMetricService`, `MultiPickerOption`, `TypedGraph`, `DryRunPackagePolicy`, `ProgressStep`, `ParamAssignmentInfo`, `LedgerService`, `Rep`, `S3URI`, `Expiration`, `DescribeOrganizationConfigurationCommandInput`, `ApiPromise`, `CohortComposition`, `SortedReadonlyArray`, `UnaryContext`, `Bytes`, `PersistenceManager`, `MyTargetProps`, `AgChartOptions`, `ServiceStatus`, `SourceFile`, `FaunaRef`, `TabPanelProps`, `ProgramProvider`, `MlContextValue`, `VizChartPanel`, `HandshakePacket`, `UserRepresentation`, `ListTagsForResourceResponse`, `Types.KafkaConsumerMessageInterface`, `IEBayApiRequest`, `WalletKeys`, `ILinePoint`, `UserRoleService`, `ListRecommendationsResponse`, `ts.PropertyDeclaration`, `Validate`, `OrderBookOrderDTO`, `StyleCompiler`, `LoadingEvent`, `ParserRule`, `DialogItem`, `DTO`, `MosaicNode`, `DefaultAzureCredential`, `ComponentHost`, `ISharePointSearchQuery`, `DetectionResultRowIndicatorColumn`, `Classifications`, `IceState`, `StyleResults`, `TextEditorDecorationType`, `SBDraft2CommandOutputParameterModel`, `TypingsData`, `InitCmdContext`, `TimePickerState`, `CameraConfig`, `JobCommand`, `ParticipantSubscriber`, `ECSEntity`, `OnePageDataInternal`, `FindResult`, `ListResourceTypesRequest`, `StyleSanitizeFn`, `CloudFrontResponseEvent`, `DescribeCommunicationsCommandInput`, `region`, `RefreshToken`, `fieldType`, `NamespacedWireCommit`, `UpdateError`, `ITests`, `PullFromStorageInfo`, `EventHandlerType`, `IndexConfig`, `NodeJS.Timeout`, `LocalStorageAppender`, `PublicPolynomial`, `PatternSlot`, `Activation`, `INvModule`, `Radian`, `ValidateDeviceOwnershipQuery`, `ReplacePanelActionContext`, `D2rStash`, `WebsocketData`, `AccountImplement`, `Verification`, `SGraph`, `NumberConfig`, `PersistStorage`, `IconPosition`, `Pen`, `UserDevice`, `OutputOptions`, `InjectionMap`, `AsyncReaderWriterLockWriter`, `AuthenticateGoogleRequest`, `PointCloudOctree`, `Messaging.IPublish`, `HTMLFormatConfiguration`, `DomainEventClass`, `ScriptCache`, `CSReturn`, `Checker`, `DeploymentTargetConfig`, `NotebookCellOutputItem`, `AccessToken`, `ClusterMetadata`, `mixed`, `StringLookup`, `GeneratePrivateKey`, `CreateConfig`, `ApplicationLoadBalancer`, `HelmManager`, `TryStatement`, `FocusMonitor`, `TestFunctionImportSharedEntityReturnTypeCollectionParameters`, `CompiledRuleDefinition`, `ApplicateOptions`, `LanguageServiceExtension`, `ConditionalArg`, `Routing`, `FormGroupField`, `CreateDatasetGroupCommandInput`, `BaseUnit`, `QuantifierResult`, `ApplicationVersion`, `AccountEmail_VarsEntry`, `CompositionEvent`, `MockDeviceManager`, `ITemplatizedLayout`, `EventEmit`, `ast.Name`, `Invocation`, `SubmitHandler`, `Plugin_2`, `AttendanceStatusType`, `RouterStore`, `com.mapbox.pb.Tile.ILayer`, `PluginPass`, `Eyes`, `trm.ToolRunner`, `CkbTxGenerator`, `LogStatement`, `RefreshOptions`, `StatusParams`, `r`, `UserSimple`, `Bucket`, `serialization.ConfigDictValue`, `PlacementType`, `VsCodeApi`, `HttpServiceBuilderWithMetas`, `OutputBinaryStream`, `DsProcessorService`, `Package.Package`, `TestImage`, `ComplexSchema`, `DoStatement`, `StackActionType`, `ParseResult`, `SlotData`, `I18n`, `UserLogin`, `MetadataStorage`, `Injector`, `hapi.Request`, `LayerConfig`, `GeographicCRSProps`, `ClientCapabilities`, `IMyDateRange`, `DeleteBackupCommandInput`, `CoverageCollection`, `MDCChipAnimation`, `HttpServer`, `QueryObject`, `DecodedAddress`, `... 12 more ...`, `CarouselProperties`, `KeysData`, `TranslatorType`, `SceneManager`, `DeliveryDetails`, `ConfirmHandler`, `EntityOperators`, `AuthUtilsService`, `CustomVariant`, `DAO`, `Liquidator`, `Progress.ITicks`, `AxisOptions`, `SurveyElementEditorContentModel`, `TestERC20Token`, `SettingsRootState`, `IQueryParam`, `CheckboxValue`, `ICAL_ATTENDEE_STATUS`, `ResponseValue`, `KeywordPrefix`, `XColorsTheme`, `PermissionsCheckOptions`, `SwipeActionsEventData`, `IModifierRange`, `OperationContext`, `ICommandMapping`, `RegExpCompat`, `ExternalDMMF.Document`, `PDFTextField`, `moneyMarket.overseer.CollateralsResponse`, `CreateExceptionListItemSchema`, `ChipDirective`, `SyncedActivityModel`, `IRealtimeSelect`, `TrainingConfig`, `monaco.CancellationToken`, `ExpressAdapter`, `ImageObject`, `PendingFileType`, `DropTarget`, `Shape.Base`, `DebtKernelContract`, `DnsRecord`, `SuggestionWithDetails`, `GenericTagId`, `TreeNodeInfo`, `WebpackError`, `SpectatorDirective`, `BTCMarkets.currencies`, `IMeshPrimitive`, `PlasmicLoaderConfig`, `AsyncCPUBackend`, `GX.BlendFactor`, `CancellationReason`, `IDataIO`, `Interpolator`, `ts.ClassElement`, `HTMLProps`, `GroupModel`, `GlobalizeConfig`, `ModelLayer`, `requests.DeleteConnectionRequest`, `DemoMeta`, `ApolloRequest`, `EncodeOptions`, `requests.ListComputeGlobalImageCapabilitySchemaVersionsRequest`, `TokenResult`, `AadHttpClient`, `AccountService`, `Error`, `MakeSchemaFrom`, `DecoratorArg`, `PaginationPayload`, `AnswerType`, `DateFnsHelper`, `PitchShifter`, `SensorObject`, `IQueuedMessage`, `TestRequestResponse`, `Incident`, `WiiSportsRenderer`, `ColumnSeriesDataItem`, `RequestAction`, `ChangedData`, `ComboFilterSettings`, `KeyType.rho`, `IInvoice`, `AddressHashMode.SerializeP2PKH`, `ConfigSetter`, `FrameTree`, `BlockOptions`, `ATN`, `OutputEntry`, `MerchantIdentity`, `OverviewTestBed`, `Base16Theme`, `InfuraProvider`, `IHealthCheckResult`, `P4`, `FunctionData`, `AppCurrency`, `IOrganizationVendor`, `SecondLayerHandlerProcessor`, `DirectoryWatcherCallback`, `OceanSpherePoint`, `ts.ImportDeclaration`, `GfxSamplerBinding`, `BookData`, `AgentMessage`, `ChatConverseState`, `TasksActionTypes`, `UpdateData`, `ListChannelBansCommandInput`, `ICategoricalLikeColumn`, `FaceletT`, `ResponseMetadata`, `DownloadResponse`, `AnyRawModel`, `CreatePackageCommandInput`, `Identifiable`, `d.Module`, `IComponentName`, `CubeArea`, `UpdateFilterCommandInput`, `MPPointF`, `MockPlatform`, `DMMF.OutputType`, `TestParams`, `CityRouteProps`, `ClientHttp2Session`, `MpUIConfig`, `InterfaceBuilder`, `DateFormattingContext`, `ITopDownGraphNode`, `ByteVector`, `ContentBuilder`, `OverrideContext`, `TwilioServerlessApiClient`, `DataStoreService`, `Dishes`, `IDatabaseDataModel`, `Character`, `TextAlignment`, `LoggerLevelAware`, `IntrospectionType`, `InputDefinitionBlock`, `MIRConstructableInternalEntityTypeDecl`, `TableServer`, `ByteStr`, `IGrid`, `IStyle`, `FormControlName`, `MDCTextFieldInputAdapter`, `TelemetryContext`, `LibraryItem`, `AssignStatementContext`, `ImportedData`, `DateRawFormatOptions`, `VideoStreamRenderer`, `ListStacksRequest`, `PluginModel`, `AccountDevice`, `OmitInternalProps`, `ResponseInterceptor`, `DocumentFilter`, `PrivateThreadAndExtras`, `RawRestResponse`, `BackupRequest`, `FileLoader`, `Statement`, `IStageConfigProps`, `TextPlacement`, `Condition`, `t.IfStatement`, `SendCommandResult`, `QuerySubState`, `MatCheckboxChange`, `GroupUserList_GroupUser`, `FetchDependencyGraph`, `ProductJson`, `HistoryState`, `ApplicationStart`, `CategoryThread`, `InStream`, `ModelBuilder`, `CanonicalOrder`, `MappedNameValue`, `Ray3`, `i18n.Node`, `FirebaseTools`, `ScreenOptions`, `HintContext`, `OrderTemplatesDetailsPage`, `KeyInput`, `CacheInstance`, `MagickGeometry`, `TelemetryServiceConstructor`, `IRECProduct`, `FeaturesDataSource`, `IWarriorInstance`, `DeclarationMapper`, `CreateViewOptions`, `PluginType`, `vec2`, `Focus`, `IMergeViewDiffChunk`, `WeakRef`, `JobDatabase`, `TextPossibilities`, `ARAddBoxOptions`, `Parser.Infallible`, `Env`, `DeployBundle`, `MDCListAdapter`, `ListParameters`, `AccountingService`, `com.nativescript.material.bottomsheet.BottomSheetDialogFragment`, `Dispatcher`, `ChangeProjectCompartmentDetails`, `SendEmailOptions`, `SyncSubject`, `skate.Component`, `CoreEnvironment`, `IViewer`, `ConstraintMember`, `DataLimit`, `TaskCallback`, `AllDecorators`, `GenericDeviceClass`, `DateType`, `ViewUpdate`, `WarpPod`, `WebsocketMessage`, `HierarchyNode`, `TypeTemplate`, `AuthorizationRules`, `ResponsiveQueryContextType`, `CanvasBorderRadius`, `MockCloudExecutable`, `DomElement`, `RoomSettings`, `DataModels.Correlations.Correlation`, `SyntaxType`, `TopAggregateParamEditorProps`, `IFixture`, `ProgramArgs`, `LaunchTemplate`, `AzExtTreeItem`, `ReviewerRepository`, `EngineWindow`, `PortalPoller`, `RectangleConstruction`, `ConfirmChannel`, `TemplateParser`, `CampaignTimelineChanelsModel`, `IWorldObject`, `RPiComponent`, `PageComponent`, `InstallTypingHost`, `ExecutionEnvironment`, `DirectoryUpdate`, `LightInfo`, `DeleteDomainResponse`, `ElasticsearchBoolQueryConfig`, `Keyframes`, `CombinedDataTransformer`, `ChartHighlightedElements`, `IParsedError`, `DeleteUserResponse`, `RendererFactory3`, `IdentifierContext`, `VaultVersion`, `RefetchOptions`, `CompoundMeasurement`, `LinkComponent`, `IContentItem`, `Algorithm`, `StellarSignatory`, `RenderCallback`, `RadioComponent`, `IOrder`, `IDataContextProvider`, `StepComponent`, `AllureRuntime`, `UI5XMLViewCompletion`, `AbstractSession`, `PositionObjOrNot`, `ts.server.Project`, `OptimizationPass`, `TransitionAnimation`, `CannedMarket`, `UnionTypeNode`, `TweetTextToken`, `TransferedRegisterCommand`, `SshSession`, `IndexPatternDeps`, `NavOptions`, `ObjectBindingOrAssignmentPattern`, `FormFieldErrorComponent`, `HttpRes`, `ListPlaceIndexesCommandInput`, `KeyRingService`, `DragactLayoutItem`, `FakeConfiguration`, `AdditionEdit`, `IPlug`, `StyleManagerService`, `Connector`, `PythonCommandLine`, `ListRange`, `SignedTx`, `BoosterConfig`, `DeletePolicyCommandInput`, `BinarySearchTreeNode`, `MediaType`, `Box2Abs`, `PlanNode`, `CharacterInfo`, `MpElement`, `RestoreDBClusterFromSnapshotCommandInput`, `VLC`, `F1TelemetryClient`, `FilterResult`, `u32`, `CompilerSystemRealpathResults`, `GetProjectCommandInput`, `PlasmicASTNode`, `HandleType`, `sdk.VoiceProfileClient`, `StructureSpawn`, `RtpTrack`, `TradingPair`, `PlanetComponentRef`, `CallEndReasons`, `BackblazeB2Bucket`, `ToggledFiltersState`, `Currency`, `ClassMemberLookupFlags`, `TT.Tutorial`, `VaultItemID`, `FrameNodePort`, `BarcodeScannerOperationParams`, `VisualizationsSetupDeps`, `IncludeMap`, `MOscPulse`, `BindingMetadata`, `LogLayout`, `requests.ListHealthChecksVantagePointsRequest`, `PseudoElementSelector`, `UserSubscriptionsInfo`, `Simple`, `InferredFormState`, `AlertContentProps`, `KeyboardListenerAPI`, `EditorFromTextArea`, `StellarRawTransaction`, `ITextDiffData`, `ProcessEvent`, `DataModels.UserTasks.UserTaskResult`, `AzureAccessOpts`, `WheelDeltaMode`, `WindowsLayout`, `DescribeDatasetCommand`, `CFMLEngine`, `EntityId`, `ConstructorParameters`, `ZoneChangeOrderModel`, `IWarningCollector`, `CtrNot`, `DbMicroblock`, `LayoutType`, `PropertyDrivenAnimation`, `Subscribable`, `AppConfigType`, `ExtractionResult`, `ExchangeParams`, `LoadCache`, `OperationDescriptor`, `AuthRouteHandlerOptions`, `HammerManager`, `ApolloReactHooks.LazyQueryHookOptions`, `BudgetSummary`, `LitecoinAddressFormat`, `Benchmark`, `IBasePath`, `CredentialProvider`, `MsgToWorker`, `GitAuthor`, `ethersProviders.Provider`, `CssToEsmImportData`, `CompilerEventBuildFinish`, `NdjsonToMessageStream`, `Movimiento`, `TapoDeviceKey`, `Renderable`, `NumberOperands`, `VdmNavigationProperty`, `TemplateChildNode`, `Page`, `ttm.MockTestRunner`, `JassPlayer`, `MonitoringMessage`, `ServiceResponse`, `FieldFormatParams`, `ReadOnlyFunctionResponse`, `HIRNode`, `TransactionProto.Req`, `PromiseExtended`, `LocalRepositoryService`, `SNSNoAuthorizationFault`, `OmvFeatureFilterDescriptionBuilder.MultiFeatureOption`, `SignerFetchRpc`, `PipelineNode`, `RoutingState`, `ModbusConnection`, `POCJson`, `HSD_TObj_Instance`, `LinkObject`, `ODataParameterParser`, `MessageThreadStrings`, `ThemeProperty`, `yauzl.Entry`, `TypeAttributeKind`, `BitmapDrawable`, `SpotMarketConfig`, `RTCRtpHeaderExtensionParameters`, `BehaviorTreeBuilder`, `GitHubRef`, `AnalyticSegment`, `StringTokenFlags`, `TevStage`, `ResStatus`, `Matrix3`, `ReferenceIdentifier`, `null`, `ParserOutput`, `LocationFeature`, `TextContentBuilder`, `Poker`, `SentryUser`, `KEYS`, `requests.ListIPSecConnectionTunnelRoutesRequest`, `Task`, `AggHistoryEntry`, `EventHandlerFn`, `SimulatedTransactionResponse`, `NextcloudClientInterface`, `TableColumnDirective`, `GithubRepo`, `TaggedLiteral`, `BINModelInstance`, `MethodDeclaration`, `ProfileProvider`, `QLabel`, `LodashDecorator`, `PuzzleState`, `ResourceArguments`, `Events.predebugdraw`, `OptimizeJsInput`, `Ornaments`, `Highcharts.VMLDOMElement`, `CheckRun`, `EosioTransaction`, `MockBackend`, `IConnectionFormSubmitData`, `InterfaceWithThis`, `cp.SpawnOptions`, `DebounceOptions`, `SP`, `PermissionData`, `DeleteScalingPolicyCommandInput`, `ValidatorStore`, `EvaluationConfig`, `FunctionReturnTypeCallback`, `DirectiveDef`, `CreateAssetDTO`, `ContinuousDomain`, `QueryRenderData`, `ONodeSet`, `LocalFilter`, `UnsupportedSyntax`, `AbstractGraph`, `TransportResponse`, `ParticleEmitter`, `CodeProps`, `SAPNode`, `BMP24`, `NormalizedProblem`, `TagValidation`, `TableRow`, `DBUser`, `AsyncProcessingQueue`, `Sources`, `TimeChangeEvent`, `DateMarker`, `JSXElement`, `CandidateFeedbacksService`, `ListOptions`, `ParsedIdToken`, `DataId`, `WalletGroupTreeItem`, `LazyResult`, `ScriptLikeTypes`, `Ad`, `TinaSchema`, `TreeResult`, `PluginBuilderLens`, `IModelHostConfiguration`, `PlatformLocation`, `ITabInfo`, `PositionProps`, `PlotLineOptions`, `PieDataSet`, `PropsWithAs`, `ProjectLanguage`, `LiveExample`, `PostContentDocumentRequest`, `ConfigParser`, `WindowWrapper`, `IVehicle`, `BenchmarkResult`, `ICircuit`, `types.Span`, `ITooltipProperty`, `RandomUniformArgs`, `ReportManager`, `NugetPackage`, `LightingFudgeParams`, `PublishOptions`, `PDFStream`, `NodeStat`, `DeserializerContext`, `IApplicableSchema`, `IUserAchievement`, `UriCommandHandler`, `CanvasMethod`, `FirebaseServiceNamespace`, `ISymbol`, `SimpleMap`, `ChainNodeFactory`, `AppHelperService`, `DatasetManagerImpl`, `KubernetesObject`, `ITagNode`, `YBasicSeriesSpec`, `ImageLike`, `ZipLocalFileHeader`, `Distribution`, `IStackFrame`, `PopoutComponentEvent`, `CompositeMetric`, `GfxrPass`, `Highcharts.RangeSelectorButtonsOptions`, `BTI`, `DispatcherLocals`, `PersistedEvent`, `NodePosition`, `CheckoutAction`, `d.BundleModule`, `yubo.IRecordMessage`, `AzureSubscription`, `sdk.SpeechConfig`, `TPT1`, `Dim`, `CloudKeyStorage`, `TypeMatcher`, `XAxisProps`, `GetPerspectiveOptions`, `ResourceDayHeaderWrapper`, `EnvoyHttpRequestInit`, `effectOptionsI`, `ColorRgb`, `PluginDependency`, `FastifyRequest`, `CallEndedListener`, `IntNumber`, `Vector3Keyframe`, `SetNode`, `MapDispatchToPropsFunction`, `IObservable`, `UseCaseExecutorImpl`, `ToolbarItemProps`, `TabLayoutNode`, `AccountHasFundsFn`, `ForwardingSpec`, `OpenOrCloseListener`, `AuthorizationService`, `LogFormatter`, `TNSDOMMatrixBase`, `ValidatedBatchConfig`, `ConstructorParams`, `ResponseParams`, `UpdateConnectivityInfoCommandInput`, `ImmutableStyleMap`, `ChildComponent`, `PageAPIs`, `ClickParam`, `TransferOffchainTx`, `IGitService`, `HelloService`, `ILineInfo`, `IAggregationDataRow`, `RandomSource`, `ResizerMouseEvent`, `MatSnackBar`, `WrappedLiteralValue`, `ThreeSceneService`, `GetStorageSuccessCallbackResult`, `ObjectMapper`, `GetTemplateCommandInput`, `ITrackInfo`, `FSFile`, `TextRenderStyle`, `IStringDictionary`, `LengthPrefixedString`, `IBindingTemplate`, `DeletePermissionPolicyCommandInput`, `CreateBucketRequest`, `N1`, `TalkSession`, `HttpContentType`, `DeploymentSummary`, `ParsedDid`, `DashboardContainer`, `BuilderContext`, `LongTermRetentionPolicy`, `RepoSnapshot`, `EarlyStopping`, `SankeyDiagramNode`, `TFile`, `FlatQueryOrderMap`, `Bracket`, `IProjectType`, `GX.CullMode`, `AbstractModel`, `IngameGameState`, `IInboxMessage`, `TestMessagingService`, `BoxShadow`, `Tape`, `LoaderAction`, `DescribeConfiguration`, `GenerationNum`, `Fig.Spec`, `SendMessage`, `ISourceOptions`, `SymbolSet`, `MEvent`, `Documentable`, `HashCode`, `ArrowCallableNode`, `CacheKeys`, `EnhancedItem`, `TimePoint`, `ChildReference`, `IButtonProps`, `MspDataView`, `JobExecutionState`, `ClassField`, `MessageSeverity`, `MangolState`, `AsyncStorageHandler`, `Studio`, `fetch.Response`, `SymbolTable`, `Thread`, `IGitRemoteURL`, `RoomVisual`, `SonarQubeApiScm`, `IUserRepo`, `BlokContainer`, `Defaults`, `RouteItem`, `LicenseInfo`, `IRGB`, `PostEntity`, `BaseCursor`, `BSPFile`, `CodegenContext`, `Servers`, `ModuleSystemKind`, `EntityDocumentResult`, `IValue`, `TileType`, `ShelfFieldDef`, `MongoRepository`, `CPoolSwap`, `ContainerNode`, `RuleMetadata`, `ng.IScope`, `WalletTreeItem`, `TreeExtNode`, `LoggerSink`, `SaveFileWriter`, `ts.ScriptKind`, `DenoExtensionContext`, `vscode.DocumentSelector`, `t.TypeOf`, `MockCall`, `Fact`, `LengthUnit`, `TSInstance`, `AxiosResponse`, `TableRowState`, `ColExpression`, `ForgotPasswordVerifyAccountsRequestMessage`, `ProcessOptions`, `MenuOptions`, `Milliseconds`, `PublicModelRouter`, `UnionC`, `NumberDataType`, `PredefinedGeneratorResolvers`, `StyleErrors`, `PhysicalLayout`, `http.OutgoingHttpHeaders`, `ToastComponent`, `LSAndTSDocResolver`, `IRenderer`, `Transition`, `CredDef`, `SerializedPolicy`, `MerkleTreeInclusionProof`, `ILectureModel`, `ActivityDefinition`, `NoteService`, `GitHubAPI`, `LocalMicroEnvironment`, `d.OutputTargetDocsReadme`, `PredicateNode`, `WholeStoreState`, `VersionArray`, `TELibCall`, `TraceConstraint`, `EndpointType`, `IntrospectionEngineOptions`, `GQLEventSearchResultSet`, `SingleSigHashMode`, `MatrixArray`, `SharedMetricsPublisher`, `AnimVectorType`, `VMLRenderer`, `IEpisode`, `AccountKey`, `RunService`, `ChatMessageWithStatus`, `FunctionJSON`, `MinMax`, `NoopExtSupportingReactNative`, `FilterFunc`, `VertoMethod`, `TextDrawer`, `IResolvedQuery`, `LabStorageService`, `ServerProvider`, `VFile`, `LookupStrategy`, `NestExpressApplication`, `ObjectContaining`, `NamespaceImport`, `MinMaxSurroundAttestation`, `IConnection`, `ElasticsearchConfig`, `AtemConfiguration`, `OrderedMap`, `TimefilterService`, `SankeyNode`, `LngLat`, `NativeImage`, `IComponent`, `SqlVals`, `BITBOXCli`, `ClientAuthCode`, `BitSource`, `OAuthExtension`, `NewOrganizationDTO`, `SVGPolygonElement`, `InstancePoolPlacementSecondaryVnicSubnet`, `Payload`, `DOMProxy`, `TionDeviceBase`, `ExpressionRenderHandler`, `ListDomainsCommandOutput`, `MessagingService`, `CountArguments`, `PointerOverEvent`, `SimpleRNN`, `MinorEvent`, `TileDoc`, `TestingRuntime`, `SetWindowProps`, `EDerivativeQuality`, `HomeProps`, `TransactionData`, `MIRTupleType`, `WordArray`, `BinaryReader`, `tsdoc.DocComment`, `SocketChannelClient`, `ClassExpression`, `ExtraFieldDetail`, `RPC.IWatchResponse`, `Ng2SmartTableComponent`, `CeramicApi`, `AsyncEvent`, `Splice`, `SectionType`, `AppModule`, `cdk.Stack`, `MomentData`, `ServiceDecorator`, `TemplateFunction`, `PostData`, `ApiClientRequest`, `RenderableElement`, `Shake`, `PIXI.Renderer`, `cxapi.Environment`, `ProtocolResponse`, `MetadataValue`, `CompatibleValue`, `CliHttpClientOptions`, `GridApi`, `HyperModelingDecorator`, `KeyboardLabelLang`, `PrivateUserView`, `QueryProvidersRequest`, `JSONSchema7`, `TBookAuthorMapping`, `IdentifierInfo`, `LinkTransport`, `Theme`, `monaco.editor.ITextModel`, `DynamicFormArrayModel`, `SourceMapper`, `FixedPointNumber`, `FileExplorerState`, `FormSubmissionErrors`, `SettingDictionary`, `VM`, `Specifier`, `VerifyRes`, `AreaState`, `ChemicalDoseState`, `AccessPolicy`, `IID3v2header`, `SortedMap`, `TableEntityResultPage`, `NormalizedField`, `Directus`, `Assert`, `RendererType`, `UpdateUser`, `ArrayContext`, `ProxyAccessor`, `RangeEntry`, `GuideData`, `CodeLens`, `DisplayValueSpec`, `DebugProtocol.SetBreakpointsArguments`, `UInt256`, `ToolchainName`, `ResourceFetcher`, `OrthogonalDirection`, `HTMLIonMenuElement`, `IFileDescription`, `RecognizerConfig`, `ILatLng`, `DictionaryModel`, `WebLayer3DBase`, `Transport`, `Predicate`, `THREE.TextureDataType`, `CompilerFileWatcher`, `MalMap`, `ConstructSignatureDeclaration`, `ModifyGlobalClusterCommandInput`, `TAuditReport`, `ChipCollection`, `IHooksGetter`, `AudioResource`, `DomManipulation`, `CommandLineParser`, `EditFn`, `Encoding`, `NextApiHandler`, `Fruit`, `SyncValidation`, `Factor`, `Animated.Adaptable`, `ModelShape`, `SecurityHealth`, `OAuthEvent`, `IFormField`, `SpaceQuery`, `PragmaValueContext`, `LayerWizard`, `UploadState`, `NextAuthOptions`, `KoaContextWithOIDC`, `i32`, `AppointmentMoment`, `MethodDetails`, `TokenItem`, `IRead`, `BindingElement`, `EVCb`, `Network`, `IWholeSummaryPayload`, `HydrateAnchorElement`, `tinycolor.Instance`, `UIBezierPath`, `TodoTxtTask`, `BaseLanguageClient`, `IosTargetName`, `UrlSegmentGroup`, `ITempDirectory`, `SignaturePubkeyPair`, `DataTexture`, `QueuedResponse`, `S1Node`, `IPriceDataSource`, `ColumnsSortState`, `StaticFunctor`, `ScreenshotBuildResults`, `HistogramBucketAggDependencies`, `ContainerProps`, `TransactionConfig`, `CsvParserStream`, `FaunaDBClient`, `CompilerProvider`, `MutateInSpec`, `fhir.Patient`, `DeviceChangeObserver`, `NativeInsertUpdateManyOptions`, `L.LatLngExpression`, `GetStaticProps`, `MarketTicker`, `XmlEnumsCommandInput`, `AzureFirewall`, `Customizable`, `ResponseHandler`, `EbmlElement`, `TimelineItemProps`, `AndroidChannelGroup`, `SuccessfulParsedMessage`, `DeleteApplicationRequest`, `Candle`, `TestController`, `Actors.Actor`, `CollectionFactory`, `AnyParameterTypeDescriptor`, `ColorT`, `VuexModuleConstructor`, `JPAChildShapeBlock`, `ShaderRegisterElement`, `Equipment`, `StateTaskEither`, `AnyIterable`, `PseudoClassSelector`, `PluginPageContext`, `FunctionDocumentation`, `TreeViewNode`, `AN`, `IKeyboardEvent`, `WalkContext`, `TTree`, `ModuleManager`, `iDraw`, `SqlTuningTaskCredentialTypes`, `PathNodeData`, `ts.BuilderProgram`, `RenderBuff`, `BaseEnvironment`, `MerkleIntervalTreeNode`, `ParseOptions`, `DejaViewPortComponent`, `EventResult`, `RollupStateMachine`, `HeaderMapManipulator`, `requests.ListDrgRouteTablesRequest`, `IComment`, `UpdateExpression`, `DocEntry`, `Registration`, `GroupIdentifier`, `AssetService`, `StackStyleProps`, `ObservableInput`, `ChartType`, `ReadableSignal`, `DeleteBackupResponse`, `WorldmapPointInfo`, `StandardContracts`, `AzureTreeItem`, `RequestId`, `ParquetCodecOptions`, `ARAddOptions`, `RangeSet`, `DocumentRecord`, `FileCommitDetails`, `RtcpPacket`, `BlockType`, `SCServerSocket`, `MessageModel`, `ChunkContentCallbackArgs`, `FileId`, `GlobalEnv`, `scriptfiles.ASModule`, `RepositoryIssue`, `TreeNodeViewModel`, `EncounterState`, `StandardProps`, `PreviewSize`, `IotRequestsService`, `ReportingUser`, `SQSEvent`, `TypedHash`, `ITooltipProps`, `DistrictsGeoJSON`, `TemplateProviderBase`, `EntryControlCCNotification`, `PureTransitions`, `BFBBProgramDef`, `Emotion`, `RelationModel`, `ArrayBindingOrAssignmentPattern`, `NgZone`, `StmtDiff`, `StackScreenProps`, `ILocalDeltaConnectionServer`, `SelectionSetNode`, `StandardContentToolsProvider`, `ForwardRefExoticComponent`, `HealerStatWeightEvents`, `ParametricRegExp`, `IGetLanguagesResponse`, `DrawerNavigationState`, `AndroidConfig.Manifest.AndroidManifest`, `RawProcess`, `ServerWrapper`, `Types.KeyValue`, `HttpBackend`, `HTMLLIElement`, `InitConfiguration`, `DType`, `EllipseEditOptions`, `EncryptOptions`, `ICitable`, `StoredItem`, `DebugProtocol.NextResponse`, `ButtonBaseProps`, `SqlObject`, `ClientTag`, `KintoClient`, `BabelPresetChain`, `ISimpleAction`, `IFormatterParserFn`, `ActionHandlerWithMetaData`, `EventTracker`, `DefaultGeneratorOptions`, `Mute`, `ISourceMapPathOverrides`, `JPAResourceRaw`, `Client.ProposalResponse`, `IViewInstance`, `PackageListItem`, `CausalTree`, `ChartActionContext`, `Authenticate`, `MenuSection`, `Applicative3`, `AnyRect`, `ViewFactory`, `VoyagerConfig`, `Suggest`, `ParameterValues`, `GetPrismaClientConfig`, `ICodeGenerationOutput`, `ISuggestionsCollector`, `AppDeepLink`, `RealtimeChannelInfo`, `grpc.ServiceError`, `Path6`, `ModuleConfig`, `ProductCategoryService`, `TagsService`, `ConvertedToObjectType`, `JStep`, `SaplingNativePlugin`, `apiClient.APIClient`, `TestSpec`, `CloudFunction`, `ProtocolFile`, `SidePanelRanking`, `AttachmentOptions`, `BarycentricTriangle`, `SolutionStackProps`, `ReportData`, `SwUpdate`, `GitHubEventModel`, `AsApiContract`, `InvalidateMask`, `IESAggField`, `ComponentSlotStylesPrepared`, `TrackedDocument`, `WriteBufferToItemsOptions`, `ListChannelsCommandOutput`, `Common.ISuite`, `EntityElements`, `ICommitAuthor`, `NameMap`, `CamelElement`, `SessionModel`, `Scope`, `ExpNumUop`, `ANodeExprLValueVar`, `InMemoryCache`, `CommonVersionsConfiguration`, `GenericAsyncFunc`, `BuildTask`, `SupEngine.Actor`, `ActiveComponent`, `TableNode`, `ReferenceDescription`, `EntityCollectionService`, `SessionRefreshRequest`, `ConnectionConfig`, `CurveType`, `ExpressionTypeDefinition`, `TimeRangeLimit`, `PureComputed`, `winston.Logger`, `PathlessInputOperation`, `peerconnection.DataChannel`, `ScopeState`, `OptionsWithMeta`, `PermissionOverwriteResolvable`, `MatButton`, `ResourceUnavailableException`, `ImportData`, `MonthOrYearComponents`, `responseInterface`, `restify.Response`, `FfmpegCommand`, `ValidatorOptions`, `MinimalCancelToken`, `files.FullLink`, `CreateChannelMembershipCommandInput`, `KeyRingSelectablesStore`, `Highcharts.AnnotationChart`, `ClarityAbiFunction`, `Entity.Status`, `FormatterFn`, `GLTFLoaderExtension`, `ValueKey`, `EventForDotNet`, `AnimationKeyframeHermite`, `SwankConn`, `UpdateAccountSettingsCommandInput`, `uint8`, `VcalVeventComponent`, `MessageConnection`, `Private.PaintRegion`, `GetIn`, `cytoscape.Core`, `DocumentColorParams`, `TaroEvent`, `GPattern`, `DisplacementRange`, `QueryProviderAttributesRequest`, `NotificationDataFilled`, `ChannelJoin`, `MockContract`, `TransposeAttrs`, `AsyncThunkPayloadCreator`, `SceneBinObj`, `SubscriberEntity`, `Telemetry`, `GfxRenderPipelineDescriptor`, `DummyNode`, `LocalDataProvider`, `LPStat`, `ERC20Value`, `CrochetPackage`, `DatatableColumn`, `CreateStackCommandInput`, `InsertContext`, `cc.Node`, `HookBase`, `SystemVerilogParser.SystemVerilogContainerInfo`, `SubmissionStatus`, `Extension`, `EngineOpt`, `Pooling1DLayerArgs`, `CodeGenOptions`, `SavedObjectsService`, `Rule.RuleContext`, `FilePropertyProps`, `IArtTextCacheData`, `UserStore`, `ModalFlavor`, `ManagedItem`, `CreateChannelParams`, `ListrRendererValue`, `FormatProps`, `UserMatched`, `PreloadedQuery`, `Events.postframe`, `ITagHandler`, `PeerInfo`, `ViewsWithCommits`, `Boss`, `i8`, `row`, `ReplFs`, `Timer`, `UI5Aggregation`, `ScanSegmentVectorItem`, `SendMessageCommandInput`, `GrafanaTheme`, `paper.PathItem`, `ActionSheetProps`, `IndexTreeItem`, `IArrayType`, `EndpointInfo`, `ICommandParsed`, `KubeConfiguration`, `fhir.Composition`, `MotionInstanceBindings`, `ABN`, `Tied`, `ArgumentsHost`, `Parameterizer`, `DBDoc`, `EntriesArray`, `ServicesState`, `UAProxyManager`, `EditorsService`, `SavedObjectsFindResponse`, `Animatable`, `ScreenshotConnectorOptions`, `AppTheme`, `IFluidDataStoreRuntime`, `Purse`, `IXingInfoTag`, `IProfileModel`, `ArcRotateCamera`, `NzNoAnimationDirective`, `MockDataGenerator`, `DecodedJwt`, `IBlockData`, `CreateContactCommandInput`, `PickerComponent`, `FilterDescriptor`, `ComponentList`, `UserResponse`, `IResultTab`, `ExportDataType`, `QuickReplyItemProps`, `ParseStream`, `SMTExp`, `Linker`, `TypeShape`, `sdk.SpeechTranslationConfig`, `ParserArgs`, `TooltipAndHighlightedGeoms`, `TimeStamp`, `IResultSelection`, `BatchNormalizationLayerArgs`, `JSZip`, `SortDirection`, `GfxBindingsDescriptor`, `execa.ExecaReturnValue`, `CoinPrimitive`, `StaticdeployClient`, `CommandEntityBuilder`, `AdaptElement`, `CSSState`, `IOperation`, `ExitStatus`, `SearchInterceptorDeps`, `SwaggerLambdas`, `ModernRoute`, `Category`, `FieldValuePair`, `TestMarker`, `AfterCaseCallback`, `Nameable`, `Bitrise`, `LikeEntity`, `CourseActions`, `EnhancementCache`, `BuildConfiguration`, `strtok3.ITokenizer`, `GEvent`, `TypedNavigator`, `PyJsonValue`, `TimeSeriesMetricDataPoint`, `IMenuProps`, `BotFrameworkAdapter`, `EmbedOptions`, `MergeTree.PropertySet`, `PortalInjector`, `MatcherCreator`, `AggsSetup`, `DataConfig`, `This`, `ResponseBody`, `MatIconRegistry`, `T12`, `GlobalNameFormatter`, `AreaNode`, `BlockIndex`, `TradingPosition`, `FiltersCreationContext`, `FileSearchCriteria`, `GBDialogStep`, `CoordinateType`, `VTF`, `K5`, `MutableCategorizedArrayProperty`, `UploadService`, `BatchCertificateClaim`, `TabNavigationState`, `Math2D.Box`, `LocalMarker`, `LogAnalyticsSourceMetadataField`, `AnimationKey`, `Connections`, `MediaQueryListEvent`, `FeedPost`, `StorageValue`, `OpenYoloCredential`, `ICardEpisode`, `HumidityControlSetpointCCReport`, `PageLayout`, `SettingsValue`, `Models.User`, `ImportGroup`, `GradientBlock`, `OpenSearchRawResponseExpressionTypeDefinition`, `ChannelsSet`, `TestContract`, `Fixtures`, `FadingParameters`, `PolyfaceBuilder`, `MemoryManager`, `MUserAccountId`, `HSD_TETev`, `IDecodePackage`, `ResourceItemXML`, `IndexRangeCandidate`, `CommandOutputBinding`, `IfStatement`, `ImportOptions`, `Sketch`, `IndexedPolyface`, `EnclosureShape`, `TestEnv`, `MileStoneName`, `DragDataStore`, `TabBar`, `PopulatedFolderDoc`, `WinState`, `next.Artboard`, `ResizeObserverMock`, `DatabaseSession`, `WorkspaceSymbolCallback`, `typeOfRow`, `IChangeHandler`, `HsConfig`, `DefaultRollupBlock`, `WssRoom`, `CardBrand`, `TimeScale`, `MergeTreeChunkV1`, `CallEndReason`, `IndicatorForInspection`, `DirectThreadEntity`, `MockImportRegistry`, `ListWorkflowsCommandInput`, `ViewController`, `VcsAccount`, `Nodelist`, `ExecutionWorker`, `RequestTracingConfig`, `protos.google.iam.v1.IGetIamPolicyRequest`, `ShoppingCart`, `NBTPrototype`, `IRootState`, `ExecutionResult`, `IAutocompleteSelectCellEditorParameters`, `RxFormBuilder`, `d.OptimizeCssInput`, `GunMsgCb`, `IListener`, `ElementEvent`, `PostgresAdapter`, `TextSelection`, `DisabledRequest`, `AuditAssertion`, `PvsTheory`, `SchemaResult`, `BuildingTree`, `IShape`, `NodeAndType`, `RawMetricReport`, `GroupType`, `TagListMessage`, `RouteDeps`, `RewriteRequestCase`, `RafCallback`, `AllocatedNode`, `IndexedAccessTypeNode`, `UserSession`, `ParserFactory`, `ShadeCoverOptions`, `ShHeap`, `CompilerContext`, `Pseudo`, `AndroidPermissionResponse`, `serviceDefinition`, `OutgoingStateType`, `Snippet`, `EventList`, `TweetResolvable`, `TemplateDeserialized`, `ComponentDef`, `XjointInfo`, `MessengerData`, `EventListenerOptions`, `FeedFilterFunction`, `ATTRIBUTE`, `IAuthCredential`, `GraphQLNonNull`, `ChartSonify.SonifyableChart`, `IDeltaManager`, `sst.App`, `ReducerNode`, `PluginDeleteAction`, `NotificationEntity`, `IInsertInput`, `SuggestionItem`, `Meeting`, `d.DevServerConfig`, `AlertAction`, `PackageDefinition`, `MeshComponent`, `MidiDevice`, `TupleData`, `WebTreeMapNode`, `KanbanList`, `CompareLookup`, `TaggedTemplateExpression`, `ApiQueryOptions`, `TemplRef`, `ResDicEntry`, `StyledLinkProps`, `FileChunkIteratorOptions`, `CreepSetup`, `mergeFunc`, `VariableDefinition`, `AbstractShaderNode`, `IMovable`, `NugetPackageTableFields`, `Pager`, `DaffPaypalTokenResponse`, `ResolvedRecordAtomType`, `MockedResponse`, `I18nUpdateOpCodes`, `ModuleID`, `FungibleConditionCode`, `AnyXModule`, `PanelPlacementMethod`, `SpawnOptions`, `ListModelsRequest`, `ElementDefinitionContext`, `S3Resource`, `ValueResolver`, `ExtraData`, `GLint`, `PageRoute`, `NumberSymbols`, `SyntaxError`, `ASTWithSource`, `SweetAlertOptions`, `ITelemetryProperties`, `V1WorkflowStepInputModel`, `ITenantService`, `FreeBalanceClass`, `OrbitTransformation`, `JSONSchema4`, `Achievement`, `JssState`, `TileAttrs`, `DndEvent`, `MerchantStaffEntity`, `AccountActions`, `LobbyMember`, `JsonRpcParams`, `ITechnology`, `ConditionalBlock`, `ModifyEventSubscriptionCommandInput`, `PluginCtx`, `ProjectEntity`, `UpdateUserSettingsCommandInput`, `WithNode`, `UserOptions`, `SDKVersion`, `FILTERS.CUSTOM`, `RatioMetric`, `IRoute.IParameter`, `CosmosOperationResponse`, `IPodFile`, `WithPromise`, `Cypress.Actions`, `DragAction`, `GraphQLNamedType`, `Oas3Rule`, `CharUnion`, `SocketHandler`, `DataFrameAnalyticsStats`, `ServiceSetup`, `FireClient`, `DefaultDataService`, `BoardEvent`, `AmmFakeInstance`, `HTMLIonContentElement`, `SubShader`, `ExtensionData`, `JCorner`, `FocusTrap`, `TransposedArray`, `EventMessage`, `IPlayerActionCallback`, `ResponderExecutionStatus`, `KeywordCxt`, `OperationGroup`, `ODataQueryOptions`, `GAMEOBJECT_SIGN`, `HookTypes`, `ComponentEmitInfo`, `HttpPrefixHeadersCommandInput`, `DescribeSecurityProfileCommandInput`, `TableOffsetMap`, `BoundFrustum`, `MemoryInfo`, `DeprovisionByoipCidrCommandInput`, `INetwork`, `StaticProvider`, `FilterStatusValues`, `IKeyState`, `CartState`, `Tsa.SourceFile`, `RollupBuild`, `ISetCombination`, `DesignerLibrary`, `CoreTypes.TextTransformType`, `keyboardJS.KeyEvent`, `d.Workbox`, `JsxAttribute`, `OrderableEdmTypeField`, `FullRequestParams`, `StateMachineTargets`, `DenomHelper`, `CompiledSchemasArray`, `ScullyRoute`, `QuerySubmitContext`, `TagTree`, `FleetConfigType`, `PackageSummary`, `OnNumberCommitFunc`, `TelegramClient`, `DefaultFilterEnum`, `SinonSandbox`, `CheckFn`, `SafeSignature`, `QueryParserListener`, `IConnectionPageProps`, `RedspotArguments`, `HttpPayloadTraitsWithMediaTypeCommandInput`, `IExportOptions`, `CacheValue`, `MiddlewareFunction`, `ISolutionEntry`, `CmafEncryption`, `TextureInfo`, `OutlineSharedMetricsPublisher`, `LayerNormalizationLayerArgs`, `BindingFilter`, `RestEndpoint`, `TagExpr`, `OpenSearchUtilsPlugin`, `ModuleInfo`, `ts.TryStatement`, `DriveItemData`, `EndRecordingRequest`, `PaginateConfig`, `LayerArrays`, `OptionsService`, `ComparisonNode`, `DryContext`, `IndexPatternsService`, `CodeBuild`, `GetAuthorizerCommandInput`, `MeterCCSupportedReport`, `ParamMetadataArgs`, `GetCertificateAuthorityCsrCommandInput`, `ISparqlBinding`, `PagedRequestDto`, `RnPromise`, `InspectPropertyReport`, `requests.ListManagementAgentImagesRequest`, `ScreenConfigWithParent`, `FinalEventData`, `PreviewProps`, `Contents.IModel`, `UserRegister`, `protocol.Message`, `FailedAttemptError`, `WalletContext`, `ILabel`, `OverridedSlateBuilders`, `ProgramIds`, `NumberArray`, `FormItemProps`, `requests.ListFindingsRequest`, `QueryAuditorAttributesRequest`, `PagedParamsInput`, `CommandFlag`, `SizedBox`, `RobotHost`, `Favorite`, `SFUISchemaItemRun`, `SubFeaturePrivilege`, `Header`, `ContextMenuProps`, `ModalType`, `WebdriverIOConfig`, `CloudWatchMetricChange`, `SignatureTypes`, `DynamicStyleSheet`, `AuxVM`, `ListenerRemoveCallback`, `CustomRule`, `ParsedIcons`, `TextRenderer`, `Options.Publish`, `MeshVertex`, `WyvernAsset`, `VectorOrList`, `IFBXRelationShip`, `DemandDTO`, `ValidationType`, `TabApi`, `ChildAppFinalConfig`, `NullLiteralExpr`, `NSSet`, `OnConflictUpdateBuilder`, `HookCallback`, `FastifyAdapter`, `OptionsReceived`, `AsyncIterator`, `EmailPayload`, `GfxTextureP_WebGPU`, `PlaybackParticipant`, `MySQLParserListener`, `SuccessfulMatchReport`, `StageStore`, `ResponsiveStorage`, `DatosService`, `ScriptCompiler`, `IRawLoadMetricReport`, `RoundingModesType`, `ValidatorSet`, `AutoFeeLevels`, `BackgroundAnalysisBase`, `ISettingsContext`, `d3Geo.GeoRawProjection`, `ProviderProxy`, `PackageJsonOptions`, `PagerCell`, `DisplayErrorPipe`, `ParameterDeclaration`, `DescribeEventsRequest`, `GameResult`, `ExportsAnalyzerResult`, `GraphQLResponse`, `ODataConfiguration`, `CardId`, `JessParser`, `CreateServerCommandInput`, `ASModule`, `NameIdentifierNode`, `AxisSpec`, `Configuration`, `U`, `SerializedEntity`, `V1ExpressionModel`, `TaskLabel`, `CreateTestConfigOptions`, `BNString`, `IVideoService`, `UnsupportedTypeLog`, `IntersectionObserver`, `UISliceState`, `PackagerInfo`, `TypeDBTransaction.Extended`, `vscode.Location`, `AzureFunction`, `TSchema`, `CreateAppointmentService`, `GestureStateChangeEvent`, `PyrightJsonResults`, `GraphRewriteBuilder`, `IOrderResult`, `AuxBotVisualizer`, `FibaroVenetianBlindCCSet`, `Pluggable`, `BlockFile`, `TimelineSpaceState`, `ApolloCache`, `AuthenticateDeviceRequest`, `NetworkTargetGroup`, `SpatialControls`, `UiSyncEventArgs`, `IRoom`, `V1APIService`, `AB`, `TestContracts`, `SettingsStateType`, `BudgetItem`, `SQL`, `IStepDefinition`, `FileMap`, `ICharacterData`, `SimpleToastCreator`, `EasingFunction`, `DescribeUserRequest`, `ProcessInstanceTableEntry`, `FlattenedXmlMapWithXmlNameCommandInput`, `IDelta`, `Expense`, `ValueMapper`, `GlitzClient`, `IContainerProps`, `Breakpoints`, `ActionBase`, `Disk`, `ProcessHandler`, `ContainerDefinition`, `ControlElement`, `MaterialButton`, `globalThis.MouseEvent`, `request.CoreOptions`, `Side`, `Cdt`, `PickResult`, `AgreementData`, `UnaryOpNode`, `DataGatewayService`, `ViewModelReducerState`, `Counter__factory`, `GainEntry`, `ImageFormatTypes`, `ExtensionProvider`, `MDL0_NodeEntry`, `MALEntry`, `EventSourceMap`, `StudioBase`, `GradientSize`, `BaseTable`, `TimeConstraint`, `EThree`, `InstancedBufferAttribute`, `Usage`, `TypeResult`, `AsyncActionProcessingOptions`, `PredictableSupportCode`, `Regex`, `WorkspaceHost`, `SiteListItem`, `CtrTextureHolder`, `CommonState`, `DeleteApplicationCommandInput`, `PElementHandle`, `ICoordinateData`, `OrigamiControlValueAccessor`, `QueryLeaseResponse`, `MIDIAccess`, `ControllerSpec`, `inquirerTypes.PromptModule`, `GetMeetingCommandInput`, `requests.ListAutonomousExadataInfrastructureShapesRequest`, `ConnectionRequest`, `ClusterEvent`, `LoadRange`, `GLTF2.GLTF`, `TagValue`, `KaizenToken`, `Prose2Mdast_NodeMap_Presets`, `ReaderPage`, `Stage`, `ClientItemViewModel`, `InternalQueryHandler`, `Notified`, `ColorString`, `VisEventToTrigger`, `LoggerInterface`, `AngularPackageLoggerMessageType`, `requests.ListResolversRequest`, `RespondersThemeType`, `SMTPServer`, `LoadedVertexDraw`, `ConfigState`, `DependencyDescriptor`, `Vector2Arrow`, `InspectFormat`, `RestManagerRequestData`, `IDinoProperties`, `Traversal`, `OAuthToken`, `clientSocket`, `ElTableStoreStates`, `THREE.WebGLRenderTarget`, `FnCall`, `JSDefinitionNode`, `AParentInterface`, `HsEventBusService`, `HandlerStep`, `ResponseStatus`, `TSTNode`, `Func`, `Entry`, `SlotStatus`, `SharedControlConfig`, `SharedPropertyTree`, `BaseModule`, `Preset`, `CipherCCM`, `GetRepositoryCommandInput`, `grpc.Request`, `ImmutableCollection`, `PropsFromRedux`, `Range`, `Express`, `DynamicFormArrayGroupModel`, `IsometricGraphic`, `CategoryLookupTables`, `UserApollo`, `JSystemFileReaderHelper`, `FormatMessage`, `ISize`, `LogEntry`, `IRootAction`, `Pod`, `ChatService`, `IVectorLayer`, `ArweaveAddress`, `AppearanceService`, `td.Action1`, `TestERC20`, `AnyFn`, `DaffCategoryReducerState`, `MachineParseResult`, `InjectorClient`, `ResourceService`, `MAL`, `VRMDebugOptions`, `ContentBlockNode`, `t.File`, `ICalculatePagingOutputs`, `tf.Tensor4D`, `Vec4`, `UpdateConfigurationDetails`, `Union`, `LoopAction`, `IE`, `NetworkDiagnosticChangedEventArgs`, `L1L2Args`, `MachineData`, `AdminState`, `SagaEnv`, `IntersectionC`, `GetRepositoryStatisticsPayload`, `ProgramOptions`, `AngularHttpError`, `BrokerConfig`, `OutRoomPacket`, `MsgUpdateProvider`, `HsLayerUtilsService`, `ConnectionPool`, `CompositeCollider`, `WU`, `TypeConverter`, `AdminActions`, `ISerializedInterval`, `FacetValue`, `CollisionKeeperCategory`, `TransferType`, `GitBlameLine`, `EventConfig`, `UploaderInputs`, `ImageRef`, `DataCardsI18nType`, `OutputSelector`, `ExceptionsBuilderExceptionItem`, `PanInfo`, `DateRangeShortcut`, `RequestLimitConfig`, `KVNamespace`, `EChartsOption`, `ConnectionRecord`, `CustomFont`, `OsdServer`, `KeySequence`, `CmdType`, `SimpleTreeDataProviderHierarchy`, `ContractManifest`, `UpdateDeploymentCommandInput`, `Simulate`, `ClientCardIded`, `IDataPerList`, `Sexp`, `ReducerList`, `GlobalParametersService`, `GetPolicyCommandInput`, `numericRootOfPolynomial`, `NgWalkerConfig`, `DetachedSequenceId`, `ConcreteTestSettings`, `LabelPosition`, `RegSuitCore`, `ErrorReport`, `MeshLambertMaterial`, `ParsedStringPattern`, `XStyled`, `ResetPasswordDto`, `SkeletonTextProps`, `StateLeaf`, `ListParams`, `ListAssetsRequest`, `XSelectNode`, `DiagnosticWithFix`, `ParsedDirectiveArgumentAndInputFieldMappings`, `MsgCloseBid`, `QExtension`, `Elements.RichTextElement`, `BridgeableGuildChannel`, `AccountGoogle`, `NavigationLocation`, `UIPageViewControllerImpl`, `IKeyQueryOptions`, `CtrFail`, `UnionTypeDefinitionNode`, `_MessageConfig`, `GPUPipelineLayout`, `SavedObjectsClientProvider`, `TopLevel`, `ts.SyntaxKind`, `PluginPositionFn`, `StorageConfig`, `BytecodeWithLinkReferences`, `TableBatchOperation`, `ITaskItem`, `IQueryFeaturesOptions`, `BlobBeginCopyFromURLResponse`, `throttle`, `Rotation`, `AnyCardInGame`, `OAuthError`, `ODataEntitySet`, `TwoFactorProviderType`, `AlertInstanceState`, `InjectorService`, `InstructionData`, `AggsCommonSetupDependencies`, `EventInitDict`, `PreviewVer`, `MemberRepository`, `ReactTestRenderer`, `BrowserHeaders`, `StageRuntimeContext`, `Trie`, `TokenManager`, `AuthenticationDataState`, `RGBColor`, `CharCategoryMap`, `LanguageClientOptions`, `MIRFieldDecl`, `IAreaData`, `BumpType`, `SwapOptions`, `ConnectionWorkflow`, `LookupKey`, `Squiss`, `GlobalStore`, `LowAndHighXY`, `CheerioElement`, `DMChannel`, `SimpleAuthenticationDetailsProvider`, `ObjectCacheService`, `FundingStrategy`, `Foxx.Request`, `LifecycleRule`, `ANDGate`, `ModelObject`, `ScoreDoc`, `IResourceRow`, `ParsedUtil`, `Tarefa`, `ITiledLayer`, `EdiElement`, `PluginEvents`, `Gamepad`, `ParsedRequest`, `MapsVM`, `DataToExport`, `Yendor.Console`, `Persona`, `KmsClientSupplier`, `StructureTypeRaw`, `ContractAddressOrInstance`, `NativeCallback`, `ToolkitInfo`, `SocketIoChannel`, `ClientRenderOptions`, `FormFieldPreviousValueObject`, `TargetLanguage`, `CreateJobDetails`, `ProviderOverride`, `CreateEventSubscriptionCommandInput`, `FileRelativeUrl`, `SpaceBonus`, `OmniOscillator`, `Json.Value`, `Eula`, `RegistryConfig`, `ObjectLiteralElement`, `IHubContent`, `ErrorMiddleware`, `TransformListRow`, `CatalogLayoutState`, `UseContextStore`, `Yendor.BSPNode`, `BrowseProductsFacade`, `Gui.VPanel`, `SearchModeDescription`, `QueryCreator`, `DestinationAuthToken`, `ActivationIdentifier`, `BellSchedule`, `AmmContractWrapper`, `BotCursorType`, `IAzureNamingRules`, `NamespaceScope`, `NodePort`, `ModelCheckResult`, `VpcSubnetType`, `OnScroll`, `VnodeDOM`, `IDestination`, `HostState`, `DataMap`, `TypedBinOp`, `CLM.EntityBase`, `BrowserError`, `PortRecordMap`, `ProjectionType`, `LabelBullet`, `InterceptorContext`, `IFeatureOrganizationUpdateInput`, `AbstractMessageParser`, `LoginUri`, `DateValue`, `ChartElementSizes`, `AvailableMirror`, `QueuePeekMessagesResponse`, `UInt32`, `SymbolScope`, `IAresData`, `EntityType`, `CreateThemeCommandInput`, `CANNON.Vec3`, `Dialog`, `ListSchemasResponse`, `TaskChecker`, `TestTerminal`, `testing.ApplicationEnv`, `d.Config`, `ToolGroup`, `MgtFlyout`, `DialogContextOptions`, `HmrContext`, `DataPublicPluginSetup`, `RealtimeAttendeePositionInFrame`, `IVirtualDeviceConfiguration`, `Parsed`, `ProgressInfo`, `NodeCollection`, `GeolocationPositionError`, `DimensionRecord`, `ApiConfig`, `AccountTransfersService`, `AutoTranslateResult`, `IRemoteTargetJson`, `TClass`, `SVString`, `IStaticWebAppWizardContext`, `MobileService`, `IVorbisPicture`, `CustomersService`, `xyTYpe`, `StageData`, `FP`, `DAL.DEVICE_ID_BUTTON_RESET`, `CDJStatus.State`, `appleTransactions.Table`, `CubicBezier`, `MutationHandler`, `LineMessageType`, `FullscreenOptions`, `LogPanelLayout`, `View`, `ResourcePermission`, `StreamableRowPromise`, `WorkItemTypeUI`, `RadioButton`, `WorldService`, `SheetObject`, `ServerLock`, `CompType`, `TAccessor`, `PixelImage`, `Noise`, `IStatusResult`, `reflect.TypeReference`, `ReactionHandleOptions`, `Inheritance`, `WexBimShapeMultiInstance`, `IdentifierInput`, `IZipEntry`, `ImageStore`, `MigrationStates`, `SupCore.PluginsInfo`, `DynamicFormLayoutService`, `ExternalMaster`, `NextConfig`, `UIClass`, `StringLookupMap`, `$p_Expression`, `HttpClientConfiguration`, `RestService`, `TableSuggestionColumn`, `UpdateCallback`, `CoralContext`, `AppStateModel`, `VLIEOffset`, `NavigateOptions`, `PIXI.Text`, `VideoModel`, `BackstageItemsManager`, `LLink`, `DiscordMessage`, `NotebookInfo`, `MessageTarget`, `AssertionResult`, `PublicEndpointDetails`, `ConnectionCloseFrame`, `MaterialGroup`, `ErrorBoundaryProps`, `momentNs.Moment`, `Int`, `EngineConfig`, `DeploymentParams`, `RenderHookResult`, `CmsEditorFieldRendererPlugin`, `ResourceSource`, `YConfig`, `BinaryDownloadConfiguration`, `Composable`, `vscode.Terminal`, `IsString`, `WorkItemUI`, `ConfigContent`, `NonNullExpression`, `VNodeQueue`, `UnitStateMachine`, `ServerItem`, `Consumer`, `SceneRenderContext`, `NgbActiveModal`, `requests.ListDatabaseSoftwareImagesRequest`, `QueryTopicForHolder`, `NodeSDK`, `CounterProps`, `ConditionalBooleanValue`, `p5ex.ShapeColor`, `GetByKeyRequestBuilder`, `NoneAction`, `NoShrinkArray`, `HttpStart`, `ControllerData`, `OperationStack`, `EffectOptions`, `VectorLike`, `ErrorMark`, `i18next.TFunction`, `RoutedPoint`, `FadingFeature`, `ContinueResponse`, `CombinedJobWithStats`, `ChainId`, `HttpClient`, `ListHealthChecksVantagePointsRequest`, `requests.ListDbServersRequest`, `UnionShape`, `GetColumnWidthFn`, `RequestInput`, `DynamicModule`, `IConnextClient`, `ServerStyleSheets`, `PageModel`, `SagaGeneratorWithReturn`, `ProcessStatus`, `IXElementResult`, `ButtonColor`, `JestProcessRequest`, `RecordEdge`, `Html5QrcodeSupportedFormats`, `ThemeSettings`, `BinOp`, `Persister.IPersist`, `YesNoLimitedUnknown`, `EncryptionAtRest`, `ThreadItem`, `QueryConstraint`, `MagickFormat`, `env`, `VorlonMessage`, `schema.Specification`, `WhenCause`, `FactoryOptions`, `DomainBudget`, `SilxStyle`, `DelegateBuilder`, `Flo.ElementMetadata`, `IImageFile`, `SecurityCCNonceReport`, `ODataNavigationPropertyResource`, `TreemapPoint`, `TileGrid`, `DynamoDBStreamEvent`, `EventsClientConfiguration`, `EnumValue`, `IDateUtils`, `GraphProps`, `SuspenseContextType`, `NVNode`, `GherkinQuery`, `DataArrayTypes`, `ScaleMap`, `INormalEventAction`, `CallReceiverMock`, `LobbyController`, `Cohort`, `WritableComputedRef`, `ColorChannelControl`, `boolean`, `HitTestResult`, `SignalingConn`, `IApi`, `GetApplicationResponse`, `VimState`, `Submesh`, `DOMRect`, `CancellationReceiverStrategy`, `ErrorRequestHandler`, `DirtyDiff`, `GetContactCommandInput`, `TaskSchema`, `CallHierarchyItem`, `TxBuilder`, `ShareCallbackFunction`, `TransactionSegWit`, `S`, `Sound`, `CtrBroad`, `GeneralName`, `ObjectFetcher`, `Neo4jConfig`, `GetServerSideProps`, `DynamicRepository`, `Cell`, `CallbackT`, `TestRunnerAdapter`, `AccessListEIP2930Transaction`, `CssBlockAst`, `TransformOptions`, `_Column`, `MonitoringOutput`, `UpdateDestinationCommandInput`, `EqualityMap`, `GasTarget`, `RollupWatcher`, `WatcherFactory`, `ReferencesIdDictionary`, `TestHookArgs`, `OnSuccess`, `GanttService`, `ToastsManager`, `ChangeLanguage`, `IDoc`, `MongoClientConfig`, `DaffAccountRegistrationFactory`, `ISPUser`, `DebugCurve`, `PropertyASTNode`, `CausalRepoStore`, `InternalCorePreboot`, `DrawOptions`, `FetchFn`, `AutoTranslateGoogleService`, `BrushScope`, `GetInvitationsCountCommandInput`, `CrudFeatures`, `DefineMap`, `HeaderViewProps`, `DomainItems`, `SnailfishNumber`, `AxisBuilder`, `TreeItem`, `IVpc`, `AccountAssetDTO`, `WidgetDef`, `Animated.AnimatedInterpolation`, `IOrganization`, `ViewportCallback`, `PossiblyAsyncOrderedHierarchyIterable`, `ParameterInformation`, `ShuftiproInitResult`, `ts.NavigationTree`, `NoelEvent`, `BitBucketCloudAPI`, `IServiceIdentifier`, `ObjectLike`, `BindingSetting`, `DiffFile`, `LogAnalyticsSourceLabelCondition`, `ESTree.Class`, `ObservableDbRef`, `ESLintExtendedProgram`, `playwright.Page`, `IBenefitsSearchResult`, `android.os.Parcelable`, `ServiceMonitoringServiceClient`, `ParsedAccountBase`, `AssetsList`, `ParallelWorkflow`, `ByteMatrix`, `S3Configuration`, `TLE.TleParseResult`, `CurrencyValue`, `InterceptorManager`, `MotorcycleDomSource`, `Threshold`, `TestConsumer`, `PatchDocument`, `CommitOrderCalculator`, `ApiHttpService`, `HumanAddr`, `CheckPrivilegesOptions`, `MutableGridCategory`, `vfs.FileSet`, `TaskName`, `DynamoDbDataSource`, `NestedCSSProperties`, `CoapForm`, `UrlService`, `jsmap`, `LoaderService`, `SpendingCondition`, `EffectCallback`, `ShadowAtlas_t`, `tabBrowser`, `TestFolder`, `IntersectionInfo`, `EventSubscription`, `TableRowPosition`, `ts.ClassDeclaration`, `Electron.MenuItemConstructorOptions`, `EdgeDisplayData`, `IUi`, `MeshData`, `IParseInstruction`, `Semaphore`, `PluginData`, `FileOpenFlags`, `ResourceManagementClient`, `ProductTypeService`, `ECSqlValue`, `BeforeInstallPromptEvent`, `GridIndex`, `ExportSpecifier`, `ConsoleInterface`, `DynamicTextStyle`, `PlaceIndex`, `fabric.Object`, `Fzf`, `LuaFiledCompletionInfo`, `Events`, `PlaywrightTestConfig`, `FormatId`, `LineBatch`, `ImageDataLike`, `SpectatorHost`, `t.Comment`, `DialogResult`, `NetworkInterface`, `DamageTypeData`, `firestore.GetOptions`, `LeftObstacleSide`, `PlatformBrowser`, `TypeVariable`, `Curried`, `ExchangeOptions`, `XYZSizeModeValues`, `IValidator`, `OneHotVector`, `WordMap`, `Linters`, `MachineInfo`, `PartitionKeyParams`, `FetchInit`, `ComponentName`, `MappedDataSource`, `ModState`, `TypeProto`, `WebhookOptions`, `CreateRouteCommandInput`, `IFeatureComment`, `Exit`, `SelectItemValue`, `CandyDate`, `SuspenseListRegistryItem`, `SinglesigAddressType`, `SymInt`, `DiagnosticAddendum`, `sharp.Sharp`, `Azure.TableBatch`, `IConfigurationComponent`, `UpdateClusterResponse`, `PackageJsonData`, `OotOnlineStorage`, `SoFetchResponse`, `DeleteConfigurationSetCommandInput`, `Caching`, `MaterialEntry`, `CheerioAPI`, `ArrayPropertyValueRenderer`, `Packet`, `Benchee.Benchmark`, `AST.ArrayAST`, `TreeGridTick`, `JSONSchema7Definition`, `ClientJournalEntryIded`, `VerdaccioError`, `IApplyJobPostInput`, `PIXI.Container`, `ItemKeyboardNavigator`, `FlexElementProps`, `AsyncTestBedConfig`, `StoredChannel`, `MessagingDevicesResponse`, `ApplyPendingMaintenanceActionCommandInput`, `DeploymentType`, `CkbBurn`, `SVGImageElement`, `OpenGraph`, `ModelDefinition`, `DecoratorNode`, `WebGLExtensionEnum`, `meow.Result`, `ContinuationData`, `RouteHealthCheckResult`, `Parallelogram`, `INetEventHandler`, `GradientDataNumber`, `PromptOptions`, `WorkspaceFolderSetting`, `ListViewProps`, `Panels`, `GeneratedPoint2D`, `Robot`, `ContactInterface`, `HttpApi`, `LayerRecord`, `SampleUser`, `GX_Material.GXMaterialHacks`, `iField`, `CSSResolve`, `NameSpaceInterfaceImport.Interface`, `server.Diagnostic`, `BgState`, `AcceptPaymentRequest`, `MenuController`, `AugmentedDiagnostic`, `AuthorizationDataService`, `AuditResult`, `TsLinter`, `Xml`, `EditableTextStyle`, `ForkStatus`, `BundleItem`, `EdiDocumentConfiguration`, `IShapeBase`, `DeleteFleetCommandInput`, `IOsdUrlControls`, `SxParserConfig`, `MockResponseInit`, `EthereumTransactionOptions`, `TaskTreeItem`, `Charset`, `Breadcrumbs`, `GitUri`, `ListInstanceProfilesCommandInput`, `RegisterParams`, `INetworkInfoFeature`, `PluginInstaller`, `ScaleGamma`, `ClientGoalState`, `ISPRequestOptions`, `IRestResponse`, `ThingMetaRecord`, `ethers.Wallet`, `UsedNames`, `MetricModalProps`, `ConfigBundle`, `estypes.ErrorCause`, `IPacketHeader`, `IDynamicValues`, `OpType`, `StoryArchive`, `IntelliCenterConfigRequest`, `CausalRepoBranch`, `IndexingRuleAttributes`, `ViewMode`, `MapPoint`, `GetServerSidePropsContext`, `Variation`, `BenzeneGraphQLArgs`, `ChatBaseSelectorProps`, `apid.RecordedId`, `Nonce`, `TagSpecification`, `EditorStore`, `ID`, `GameId`, `requests.ListRemotePeeringConnectionsRequest`, `AddonActions`, `Point3D`, `unchanged.Unchangeable`, `MultiRingBufferReadableStream`, `WebSiteManagementModels.Site`, `Uint`, `ReaderObservableEither`, `i18n.Placeholder`, `SendTxBody`, `PackageType`, `LocalReference`, `AccessorCache`, `SpriteManager`, `SimpleNode`, `FTPResponse`, `TriggerEvent`, `ThyPopoverContainerComponent`, `SortOrder`, `IAstMaker`, `apid.GetRuleOption`, `UserTypeReference`, `UpSetJSSkeletonPropsImpl`, `ATOM`, `INanoDate`, `IExecutorHandlersCollection`, `IDocumentInfo`, `AggregationData`, `SParentElement`, `TextElementStyle`, `NodeCheckFunc`, `LiteralReprAll`, `GraphicUpdateResult`, `visuals.Coord`, `ListRepositoriesCommandInput`, `IterableChanges`, `TreeItemComponent`, `TaskScheduling`, `ConfigurationProps`, `InputObjectType`, `VarSymbol`, `IICUMessageTranslation`, `UseMetaStateOptions`, `GLfloat2`, `ParquetWriterOptions`, `INodeList`, `requests.ListStacksRequest`, `MeshNormalMaterial`, `ArrayMap`, `SmsCookie`, `HassEntities`, `RectilinearEdgeRouter`, `BrandC`, `DiscoverInputSchemaCommandInput`, `THREE.Intersection`, `DeleteValue`, `ProsemirrorNode`, `MethodNames`, `RgbaColor`, `TickItem`, `IContent`, `LocalizedLabels`, `ShaderVariant`, `ControlItem`, `ILocalValue`, `IfExistsContext`, `NormalizedExtension`, `OAuthConfig`, `TaskQueue`, `Snapshot`, `LatLng`, `MonitoringOutputConfig`, `MessageDataType`, `IDSLCodeState`, `ContentProps`, `Synth`, `TableStorageContext`, `MediaStreamAudioDestinationNode`, `DriveItem`, `DownloadTask`, `StaticPathLoader`, `Rx.Observer`, `CoreEventHandlers`, `PointerPosition`, `SortService`, `PreferenceStateModel`, `ReplicatorQueries`, `vile.PluginList`, `GX.CompType`, `LightGallery`, `FolderInfo`, `SettingValue`, `Import`, `Ledger`, `Locator`, `NamedTupleMember`, `APIConfigurationParameters`, `ODataPathSegmentsHandler`, `BaseDocumentView`, `SKShadowItem`, `BrandService`, `ColumnRefContext`, `MonitoredHealth`, `IGenericTagMapper`, `IModdleElement`, `ExtractGroupValue`, `RuleStateData`, `ThemeStore`, `SubscriptionsClient`, `ExprEvaluatorContext`, `PrecalculatedBot`, `NumberRowModel`, `InAppBrowserObject`, `AnyColumn`, `Ban`, `EditPageReq`, `PriceSpec`, `ParserException`, `InstallationQuery`, `StopFlowCommandInput`, `AnyNode`, `StacksTransaction`, `CreateDomainCommandInput`, `DragDropManager`, `PatchObjectMetadata`, `Neuron`, `requests.ListTagDefaultsRequest`, `ProfilerConfig`, `InviteActions`, `DocTableCell`, `VpnGateway`, `CheckResultBuilder`, `InvoiceEstimateHistoryService`, `FlowExhaustedMatch`, `CommandArgs`, `ReConfigChunk`, `DerivedKeys`, `Replacer`, `WorkboxService`, `GetNamespaceResponse`, `KeyboardEvent`, `VoidFunctionComponent`, `WatcherFolder`, `GX.TevOp`, `AsyncLocalStorage`, `picgo`, `WriterType`, `HitSensor`, `CreateRepositoryCommandInput`, `VIS0`, `PermissionOverwrite`, `PageBlockRule`, `AstNodeFactory`, `CharSet`, `INavigationData`, `FeatherProps`, `pino.Logger`, `ClientRect`, `ImageDataBase`, `Pallete`, `Words`, `TestControllerPoint`, `TransationMessageOrObject`, `NeverShape`, `AgeOrForm`, `LexoInteger`, `AuthHeaderProcessor`, `TrueConstraint`, `IDBPObjectStore`, `OhbugMetaData`, `DebugProtocol.StepOutResponse`, `ArgumentMetadata`, `SortedMapStructure`, `ContentManagementService`, `ConfigLogger`, `WebGPUBackend`, `UpdateFilter`, `InstructionParams`, `DigitalObjectSet`, `DiffHunk`, `EntitySchema`, `LoadmoreFlatNode`, `SoftVis3dMesh`, `PartsType`, `LineWithBound`, `Register`, `NodeCore`, `AltStore`, `PluginStorageKind`, `ProxyReducer`, `FabricGatewayRegistryEntry`, `TextureParameterEnum`, `MagentoOrder`, `FontCatalog`, `requests.ListIPSecConnectionsRequest`, `PromoteGroupUsersRequest`, `SoloOptions`, `SchemaDef`, `CrochetActivation`, `OrganizationDocument`, `MatSortHeaderIntl`, `PrettierOptions`, `ExpressionFunctionTheme`, `CurriedFunction1`, `MutableChange`, `Watch`, `GrpcResponseMessageData`, `JoinOptions`, `TupletDot`, `ts.HeritageClause`, `SettingsRepository`, `CreateTaskCommandInput`, `XAndY`, `TestConfigData`, `UpdateJobRequest`, `CategoryResult`, `SyncToolSettingsPropertiesEventArgs`, `GetConfigurationSetCommandInput`, `VisHelpTextProps`, `XYZAnyValues`, `ReportsService`, `RollupTransaction`, `PeerTubeServer`, `requests.ListNetworkSecurityGroupVnicsRequest`, `HeaterState`, `AWSSNSRecordItem`, `DelegatorReward`, `ModuleMock`, `SankeyGraph`, `BotTags`, `AutorestNormalizedConfiguration`, `CommonDialogService`, `SkeletonHeaderProps`, `P2PEnhancedPeerInfo`, `PathSolution`, `Settled`, `IArticle`, `Aabb2`, `LOG_LEVEL`, `IMrepoDigestConfigFile`, `RelayRequestAny`, `FunctionFragment`, `Block`, `PropertySet`, `AuthHandler`, `Atom`, `Listenable`, `ExtensiblePayload`, `DocumentDecoration`, `RowId`, `NetworkErrorType`, `CreateBucketCommandInput`, `DashLowerthirdNameInputElement`, `Solver`, `SavedObjectDescriptor`, `IProfileLoaded`, `AError`, `Biquad`, `InstanceStatus`, `PostfixUnaryExpression`, `CancelParameters`, `ReferenceRecord`, `PlaybackSettings`, `SCNMaterial`, `ProgressHandler`, `ThyDialogConfig`, `NoteForActivitySetup`, `STRowSource`, `UpdateExperimentCommandInput`, `Serenity`, `SideMenuState`, `SignalValues`, `InterfaceWithExtends`, `TEntityRecord`, `BindingTemplate`, `UrlEntity`, `express.Application`, `CeloTxReceipt`, `ast.SeqNode`, `ServerClosure`, `BaseConvLayerArgs`, `PolySynth`, `FontAwesomeIconStandalone`, `MessagesPageStateModel`, `DirEntry`, `ObjectExplorerService`, `RecommendationType`, `DrawContext`, `IncrementalQuinTree`, `TxOptions`, `LoaderInstance`, `PostgresConnectionOptions`, `EntityRecord`, `VisualizeEmbeddableFactory`, `JsonRpcResponsePayload`, `TypeNode`, `StatsModule`, `ts.Type`, `StatsNode`, `Pred`, `CertificateSubjectAlternativeName`, `ViewModel`, `Face3`, `AriaLivePoliteness`, `BrowserFetcherRevisionInfo`, `forge.pki.Certificate`, `MigrationDefinition`, `VMLDOMElement`, `IterableActivity`, `AnimalType`, `interfaces.BindingOnSyntax`, `SymString`, `ZWaveController`, `StructuredAssignementPrimitive`, `DeleteBotAliasCommandInput`, `TokenSigner`, `VariableState`, `ENGINE`, `ODataQuery`, `SeekQueryResult`, `OS`, `PingProbeProtocol`, `TypedData`, `ExecutionContainer`, `ReferenceArray`, `RemoteStream`, `Sandbox`, `TClientData`, `ReqWithUser`, `LoadContext`, `SteeemActionsProvider`, `IChapter`, `ItemProps`, `ListFleetsCommandInput`, `PolicyDetails`, `ILocationProvider`, `IBlock`, `FilesService`, `TriggerApexTests`, `ConsolidateArgs`, `RenderBannerConfig`, `CodeModel`, `PriceAxisViewRendererData`, `ValidationQueueItem`, `PrintableType`, `ValidationController`, `WordGroup`, `TargetElement`, `CBPeripheral`, `SubscriptionEnvelope`, `User1524199022084`, `PermuteLayerArgs`, `LengthType`, `HighlighterCellsProps`, `TelemetryPluginSetup`, `functions.storage.ObjectMetadata`, `DurableOrchestrationClient`, `LogFileParsingState`, `GeoBox`, `requests.ListManagementAgentPluginsRequest`, `RgbVisConfig`, `LogAnalyticsParserFunction`, `IMonthAggregatedEmployeeStatisticsFindInput`, `IFoundCursor`, `Previews`, `RouterState`, `ListMultipartUploadsCommandInput`, `DashboardService`, `sdk.IntentRecognizer`, `ParseQueryOutput`, `CKBConfig`, `TaskRecord`, `XcodeProject`, `CreateConfigurationSetEventDestinationCommandInput`, `CurrencyPair`, `ISeed`, `ViewBox`, `SoftmaxLayerArgs`, `LineCounter`, `CognitoIdentityServiceProvider`, `JNICallbackManager`, `XmlParserNode`, `WordInName`, `requests.ListRouteTablesRequest`, `OpenChannelObjective`, `Parameters`, `ThemeInfo`, `StaticDataView`, `DataflowState`, `MyClass`, `Issue`, `DeleteApplicationResponse`, `puppeteer.ClickOptions`, `ProviderRpcError`, `EPPrimitiveDependencies`, `SiteService`, `EnhancedGitHubNotification`, `BlobPart`, `IInjector`, `DeleteRequestBuilder`, `DidConfig`, `NgxMdService`, `requests.ListServicesRequest`, `SDKModels`, `CalendarWrapper`, `ErrorPayload`, `Register16`, `FileDescriptorProto`, `QuerySnapshot`, `AddTagsToResourceCommand`, `core.PathDescription`, `IYamlItem`, `OutlineManualServerEntry`, `StaticCardProperties`, `PaletteMode`, `QuicStream`, `ImportRules`, `BehaviorHost`, `VariableModel`, `ModuleSpecifier`, `CommittedFileChange`, `UAVariable`, `IndexImpl`, `TypeArgumentResult`, `DMMF.InputType`, `WetMessage`, `ArmSaveConfigs`, `CoreWeaponMode`, `IGherkinOptions`, `Performance`, `JQueryMouseEventObject`, `RunResult`, `VariantCurveExtendParameter`, `IPFSDir`, `GetRuleCommandInput`, `DataTable.Column`, `SavedVisualizationsLoader`, `MDCDialogAdapter`, `IFileEntry`, `sdk.DialogServiceConnector`, `VariableUiElement`, `RegionTagLocation`, `SqlStatisticsTimeSeries`, `ScenarioEvent`, `ProductService`, `SeriesRef`, `IConnectionFormState`, `IDropboxAuth`, `AWSLambda.Context`, `IOHandlerForTest`, `IsMutedChangedListener`, `TcpPacket`, `NumberInput`, `MyEvent`, `HttpAuthenticatedConnection`, `FileOverwriteOptions`, `SQLFragment`, `ApplicationStatus`, `Mongoose`, `GuildService`, `DeviceManagerImpl`, `FolderDoc`, `StorageReference`, `IsvDebugBootstrapExecutor`, `React.Reducer`, `AttributeInfo`, `SectionModel`, `WebCryptoPartialPbkdf2`, `LabwareDefinition2`, `fs.Dirent`, `IAM`, `requests.ListServiceGatewaysRequest`, `ReindexService`, `AttributeFilter`, `IRasterizedGlyph`, `RequiredParserServices`, `SideBarItem`, `GfxBlendFactor`, `UnboundType`, `ProtocolExecutionFlow`, `RenderableSprite3D`, `ReaderContext`, `DescribeDatasetCommandOutput`, `StreamModel`, `FlowFlags`, `BreadcrumbsListProps`, `BTreeNode`, `UseMutationOptions`, `SnackbarMessage`, `JsxExpression`, `ApplicationEntity`, `MapRendererParameters`, `UnresolvedLogs`, `NormalizedRuleType`, `RSPOutput`, `SortField`, `IAllBondData`, `CacheInfo`, `EvmAccount`, `AgentIdentity`, `configuration.uiType`, `YamlCodeActions`, `sdk.TranslationRecognitionCanceledEventArgs`, `FormatTimeInWordsPipe`, `DeprecationsFactory`, `BlockMarketCategory`, `INodeTypeDescription`, `HeapInfo`, `BinarySensorCCGet`, `CookiesFilterParams`, `AnimationData`, `MultiIndices`, `JSCodeshift`, `StateChannelsJsonRpcMessage`, `IMessageHandler`, `FleetRequestHandler`, `CategorySortType`, `TickSignal`, `Clock`, `IDesk`, `SizeConfig`, `DeepImmutable`, `API.storage.api.ChangeDict`, `nsIDOMNode`, `ClipShape`, `FieldNamePath`, `ReCaptchaInstance`, `HookResult`, `MenuSurface`, `MDCMenuSurfaceAdapter`, `AllowedLanguage`, `ISearchEventDataTemplate`, `TableDiff`, `MessageHeader`, `RulesTestEnvironment`, `SocketAddress`, `FastifyInstance`, `DragDropProviderCore`, `Estimate`, `UAObject`, `InvariantContext`, `ItemElement`, `ClearingHouse`, `React.ReactNode`, `JobResultDTO`, `GetOpts`, `Degree`, `GetSemesterTimetable`, `DirType`, `HandlerContext`, `OutgoingHttpHeaders`, `AccountMongoRepository`, `Notification`, `MessageWorkflowMapping`, `AP`, `UpdateAvailableEvent`, `LoadConfigResults`, `PlayerListPlayer`, `CreateModelResponse`, `NavigationEdgeStatus`, `WorkflowExecuteMode`, `Concat`, `WriteLeaderboardRecordRequest_LeaderboardRecordWrite`, `NetworkSettings`, `ElementWrapper`, `CreateBackupCommandInput`, `O.Compulsory`, `GenesisCommit`, `APIConstructor`, `ExtendedVue`, `ILayerDefinition`, `TestInstance`, `SpawnSyncReturns`, `DaffCartShippingRateFactory`, `SortEvent`, `InMemoryPubSub`, `ExternalAuthenticateModel`, `Webhooks`, `TreeItemModel`, `BLOCK`, `SnackbarContextInterface`, `Percussion`, `StitchesComponentWithAutoCompleteForJSXElements`, `Sym`, `ModalService`, `vscode.ConfigurationTarget`, `DSVRowString`, `MatchNode`, `IPluginContext`, `ViewEntityOptions`, `TextureManager`, `TName`, `MetaStaticLoader`, `Deal`, `Numbers`, `ResponseReaction`, `TInstruction`, `StepNode`, `ColorPreviewProps`, `DocumentRequest`, `StringOrNumberOrDate`, `requests.ListVaultReplicasRequest`, `ObjectDescriptor`, `FlipDirection`, `ConfigureOptions`, `KeysType`, `LanguageMode`, `RawDraftContentBlock`, `MaterialConfig`, `ParseCssResults`, `OutlineCreateTag`, `IHttp`, `MatchedSelector`, `ApiErrorReporter`, `JobName`, `VNodeArrayChildren`, `BinaryTree`, `AtomArrowBlockElement`, `ICategoryBin`, `IStoredTransaction`, `JSDocUnionType`, `NgIterable`, `Abbreviation`, `TreeviewItem`, `HashMapState`, `ValueSource`, `dagre.graphlib.Graph`, `ATNConfigSet`, `CdkTree`, `mat3`, `BarGroupValue`, `Hono`, `DeleteNotificationsRequest`, `EnvironmentService`, `NEOONEProvider`, `AlignSelf`, `ISection`, `SelectableListState`, `LikeNotification`, `ContainerContext`, `Inversion`, `FluentIterable`, `MonzoAccountResponse`, `CheckupConfig`, `FormulaOptions`, `Attribute.JSON`, `TsSelectComponent`, `InsertBuilder`, `ReplyShortChannelIdsEndMessage`, `IFeatureCommand`, `ParentNode`, `IAngularEvent`, `HttpMiddlewareEffect`, `CardRequirement`, `IceCandidate`, `FetchStartedAction`, `BrowserExceptionlessClient`, `TabBarToolbarRegistry`, `UIDatePicker`, `RelativePattern`, `RestElement`, `PartialItem`, `GetAuthorizationTokenCommandInput`, `UseMutationReducerAction`, `WorkspaceSeed`, `IdentifierNode`, `Joi.ValidationResult`, `Animated.Node`, `GeneratedReport`, `UpdateThemeDto`, `IFrontendDomChangeEvent`, `GroupService`, `ParameterReflection`, `ColorDirection`, `ImageSourcePropType`, `Config3D`, `ParseFunction`, `monaco.languages.CompletionItem`, `StandardPrincipal`, `ThemeColorDefinition`, `MyOtherObject`, `RedirectResult`, `IPage`, `AutorestConfiguration`, `IAddAccountState`, `CrossProductNode`, `CreateStreamCommandInput`, `CircuitBreaker`, `marked.Renderer`, `PermissionTree`, `DocType`, `TraderConfig`, `IonicModalController`, `SolutionBuilderState`, `SynthBindingName`, `IChoiceGroupOption`, `InternalNamePath`, `IJetURLChunk`, `RequiredValidator`, `ModelInfo`, `VIS0_NodeData`, `HandlerInfo`, `BackblazeB2File`, `CodeMirror.Editor`, `WidgetProps`, `RibbonComponent`, `PublishedStoreItem`, `UpdateNoteRequest`, `Draft`, `JsSignatureProvider`, `GetStateParams`, `TableDefinition`, `NixieEquipment`, `UniswapVersion`, `AuctionViewItem`, `OutputMessage`, `RewardTransactionList`, `ResolutionConfig`, `ReflectedValueType`, `TNSCanvasRenderingContext`, `SystemModule`, `DarwinMenuItemConstructorOptions`, `UiActions`, `SetCombinationType`, `G2TimelineData`, `SubstituteOf`, `WriteableStream`, `ResultAccumulator`, `FileConfig`, `t_63513dcd`, `CommonStatusBarItem`, `INativeTagDict`, `GradConfig`, `ICommandResponse`, `ActionSheet`, `requests.ListNodePoolsRequest`, `ApimService`, `active`, `UnicodeRangeTable`, `ValueObject`, `MatcherGenerator`, `requests.ListInstanceAgentCommandsRequest`, `ImageStyle`, `TextElement`, `CameraComponent`, `SignedOperation`, `GenericError`, `RNNCell`, `LiteralObject`, `web3ReactInterface`, `InstrumentationConfig`, `Exceptions`, `Events.pointerdragleave`, `PropertyKnob`, `GCFBootstrapper`, `PrivateApiImpl`, `OperationPath`, `Outline`, `IComponentDesc`, `ExtraSessionInfoOptions`, `Workspaces`, `HighlightService`, `CredentialOfferTemplate`, `TLabelName`, `StateContext`, `IMockEvent`, `BoundExistsFn`, `ForecastSeriesContext`, `AZDocumentSymbolsLibrary`, `IDocumentMessage`, `ConnectedAccount`, `IWatchOptions`, `MerchantMenuOrderGoodsInfo`, `UseFetchReturn`, `ShapeAttrs`, `ReplicationConfigurationReplicatedDisk`, `MIREntityType`, `VariantMatchedResult`, `DependencyIdentifier`, `FloatTerm`, `DpcMgr`, `Sheet`, `ClientInstance`, `MapboxGL.Map`, `AccountState`, `IAuthResponse`, `SizeProps`, `ESLMediaRuleList`, `EditorSettings`, `ICanvasProps`, `Ticket`, `RawTransaction`, `RoxieResult`, `Universe`, `LanguagesEnum`, `Bluebird`, `JSONAPIDocument`, `AmmLiquidityPool`, `RepositionScrollStrategy`, `InversifyExpressServer`, `GenericThemeShape`, `_ISelection`, `FakeNumericDataset`, `IFileRequest`, `UsageExceededErrorInfo`, `SharedFileMetadata`, `EmployeeInfo`, `ListApmDomainsRequest`, `CompletionEntry`, `KudosTokenFactoryService`, `LogAnalyticsLabelView`, `ScriptingDefinitionStub`, `ParameterCondition`, `CeloContract`, `GfxRenderDynamicUniformBuffer`, `XMessageBoxService`, `LocalFileName`, `RemoteDatabase`, `RequestBase`, `FirebaseMachineLearningError`, `ICXOffer`, `Receiver`, `DataViewCategoryColumn`, `IClothingStore`, `FileInfo`, `ConfigTypes.CFWorkers`, `RequestInformationContainer`, `ResponseInit`, `TMigrableEnvelope`, `CdkDragEnter`, `Optimization`, `FullIndex`, `ReadonlyColor`, `L`, `DatabaseOptions`, `AtomicAssetsHandler`, `MDCDialogCloseEvent`, `AV1Obu`, `Paths`, `P2PRequestPacket`, `TrustedSc`, `IContextualMenuItem`, `TestingModule`, `IntegrationTypes`, `WirelessMode`, `Logout`, `CalibrationPanelProps`, `TheMovieDb`, `PortalConfig`, `MpEvent`, `DateSkeleton`, `T.ComponentMap`, `Footer`, `InterfaceAliasExport`, `ClockOptions`, `FormatterOptions`, `DemoFunction`, `ITimer`, `Accept`, `ExtendableBox`, `DependencyWheelPoint`, `ReferenceParams`, `Expand`, `QueryExpressionBodyContext`, `LazyService`, `SModelRoot`, `RectGeometry`, `PathToRegExpOptions`, `api.ITree`, `Meta.Window`, `LoggerInstance`, `DSpaceObjectDataService`, `IFB3DOM`, `ContentObject`, `TValPointer`, `JSONRPCClient`, `ItemInfo`, `FrameData`, `Reserve`, `EventSpy`, `ExchangePositionInput`, `TruthTable`, `CanvasGraphic`, `LogChildItem`, `DtlsClient`, `RequestPrepareOptions`, `ValueFn`, `RepoError`, `DeviceFormPostData`, `PackageToPackageAnalysisResult`, `StoredEncryptedWallet`, `Spec`, `MessageSerializer`, `Fixture`, `Runner`, `DeSerializersT`, `pulumi.InvokeOptions`, `ChildRuleCondition`, `MultiValue`, `FormatFlags`, `HookOptions`, `FooService`, `SourceCode`, `ProblemViewPanel`, `FactorySession`, `ObjectKeyMap`, `KeyValueCollection`, `IFocusedCellCoordinates`, `BastionShareableLinkListRequest`, `SearchResultComponent`, `Shadows`, `SetValue`, `MediaService`, `CarouselState`, `Lut`, `FieldModel`, `LiveEventSession`, `tensorflow.IGraphDef`, `RuleContext`, `AlphaTest`, `IPivotItemProps`, `UpdateFolderCommandInput`, `d.HostConfig`, `ImportService`, `KvPair`, `RaceCancellation`, `FilterOf`, `StakingBuilder`, `PluginsServiceSetupDeps`, `BaselineResult`, `QueryDeepPartialEntity`, `api.State`, `RepoInfo`, `DescribeAccountAttributesCommandInput`, `TaskDoc`, `GetDatabaseCommandInput`, `CSharpDeclarationBlock`, `CatCommonParams`, `IAttentionSeekerAnimationOptions`, `SourceString`, `FatalErrorsSetup`, `HintsConfigObject`, `DeepMapAsyncResult`, `FaceletCubeT`, `BlockEntity`, `NavbarService`, `BisenetV2CelebAMaskConfig`, `TestEmitter`, `RegionHash`, `MemberDef`, `RibbonEmitter`, `CompletionItemData`, `Ptr`, `DeploymentEntry`, `OncoprintModel`, `EdmxMetadata`, `PrimaryButtonProps`, `glTF.glTFNode`, `CommonMiddlewareUnion`, `ModalSize`, `CohortCreationState`, `IMDBVertex`, `ValidatePurchaseGoogleRequest`, `TextBox`, `BinaryWriter`, `CacheContainer`, `configuration.Data`, `BuildHelpers`, `HTMLAnchorElement`, `MapOf`, `RosApiCommands`, `x`, `CreateClusterCommandInput`, `FilterDataStatusValues`, `AwsVpcConfiguration`, `ApplicationCommandOptionChoice`, `IBindingWizardContext`, `FaunaTime`, `SceneGroup`, `Run`, `DeleteSessionCommandInput`, `Span_Link`, `BookmarkMetadata`, `enet.IConnectOptions`, `DataImportRootStore`, `OpenDialogReturnValue`, `InternalComputedContext`, `PersonType`, `PutObjectRequest`, `PageInfoListItem`, `FuncArg`, `AttributeData`, `xml.Position`, `ClassMap`, `Watermark`, `DescribeJobsCommandInput`, `RolePermission`, `ThyFullscreenRef`, `ScopeTransform`, `ParamInfo`, `DaffCartAddress`, `IRequestApprovalFindInput`, `PatchListener`, `OpPathTree`, `CategoryService`, `AppContextData`, `ScriptLike`, `PluginRemoteLoadZipOptions`, `ParsedColorValue`, `ILoginState`, `CookieParseOptions`, `vscode.CompletionList`, `CommentResponse`, `MockSocket`, `PiUnitDescription`, `RecordRawData`, `ICredentials`, `ITreeDataNode`, `ChartWidget`, `ActionWithPayload`, `TokenizerState`, `NzTabComponent`, `GesturesObserver`, `AsyncSourceIterator`, `OrderedIterable`, `RawTextGetter`, `MatchOptions`, `IConfigFile`, `PromiseReadable`, `OutputTargetHydrate`, `ICandidateInterview`, `WebHook`, `NovaResources`, `L.LatLng`, `TestInterval`, `IPropertyTypeValueDescriptor`, `ITableColumn`, `ICommandManager`, `PatternPreset`, `vscode.QuickPickOptions`, `FcCoords`, `PortalType`, `HSD_TEArg`, `ResultTree`, `Arity`, `ExtractControlValue`, `InheritedChildInput`, `IHash`, `Master`, `EulerRotation`, `rootState`, `CardRenderEffect`, `ActionType`, `ClippingPlane`, `ResourceAlreadyExistsException`, `TAccum`, `TransactionPool`, `ActionCreator`, `TextEditAction`, `RunContext`, `SaberProvider`, `MikroORMOptions`, `DoorLockCCConfigurationSet`, `DBCoreTable`, `TArg`, `GX_VtxAttrFmt`, `ICassClusterModuleState`, `ControllerClient`, `IpcResponse`, `NotifyOptions`, `IBirthCompositionBody`, `UploadData`, `RestOrderbookRequest`, `SnackbarErrorAction`, `ModuleRpcCommon.EncodedContext`, `SolidityVisitor`, `IpcRendererEvent`, `AnnounceNumberNumber`, `StepOptions`, `Funnel`, `IDynamicGrammar`, `GlobalSearchProviderResult`, `HsSaveMapService`, `FormValueType`, `ActionForRender`, `ProposalData`, `AtomicMarketContext`, `OverlappingPathAnalyzer`, `Fill`, `DevtoolsBackend`, `ArticleFormat`, `binding_grammarVisitor`, `AbstractViewer`, `protos.google.iam.v1.ITestIamPermissionsRequest`, `BarcodeFormat`, `INameAtom`, `EggAppConfig`, `IWarehouse`, `SpriteSheetSpacingDimensions`, `DataStore`, `SnapshotDiff`, `BlobTestServerFactory`, `PgAttribute`, `CommonInterfaces.Plugins.IPlugin`, `SendTransactionOptions`, `CstNode`, `SimpleOption`, `esbuild.OnLoadArgs`, `CustomRenderer`, `AlertService`, `StreamDescription`, `IpcMain`, `TestSourceIO`, `TransactionReceiptsEventInfo`, `IMilestone`, `DiscordMockContext`, `AMap.Map`, `PublishState`, `q.Message`, `Nameserver`, `React.PropsWithoutRef`, `S2ExtensionType`, `StepSelection`, `ICoverageFragment`, `Fr`, `EventActionHandlerCallableState`, `Value2D`, `StoreConstructor`, `HeadConfig`, `KeyboardProps`, `AdtLock`, `BaseTexture`, `VueFilePayload`, `IShadowGenerator`, `ILoggerModel`, `DeleteAppCommandInput`, `RootCID`, `SortablePolygon`, `VehicleEvent`, `GfxSampler`, `IErrorPositionCapable`, `CircularDependency`, `ICSR`, `AndroidChannel`, `UserInputPlugin`, `TFS_Core_Contracts.TeamContext`, `AppThunkAction`, `RenderParams`, `StableToken`, `IRunResult`, `RedisCacheAdapterContext`, `ApiDefinitions`, `GetRotation`, `CSVInput`, `LocationCalculator`, `ChannelResource`, `P2PMessagePacket`, `IEmployeeProposalTemplate`, `FontData`, `SelectorParser.Node`, `QueriesStore`, `TrezorTransport`, `CategorizedSettings`, `IGetTimesheetInput`, `Deposit`, `IBlockchainQuickPickItem`, `VirtualData`, `Mocha.MochaOptions`, `CommandInterface`, `MasterKeySecret`, `ReadonlyVec3`, `VideoDialog`, `DatabaseService`, `ServiceBuilder`, `RangeSelectorOptions`, `ReserveData`, `RepoState`, `ImageFov`, `IIconItem`, `MakeRequest`, `ComponentAst`, `QueryObserverResult`, `JsonApiDocument`, `ShapePath`, `ProviderState`, `DefinitionResult`, `TodoItemFlatNode`, `RebaseEditorContext`, `ManagementDashboardForImportExportDetails`, `HeadingSize`, `PartyData`, `Omit`, `SourceEntity`, `ViewerParameters`, `SwitchIdInfo`, `IRenderMime.IMimeModel`, `CollateContext`, `TResponse`, `BtnProps`, `BarService`, `MatchResult`, `ThisExpression`, `BasicGraph`, `AvatarService`, `SpeciesName`, `ServerSideTransactionResult`, `DateTimeFormat`, `DeferredValue`, `ISpawnOptions`, `BackendType`, `Beatmap`, `StructServiceOptions`, `PuppetASTClass`, `Finish`, `FormatFunc`, `MigrateDev`, `ExpressionsCompilerStub`, `LazyScope`, `Constant`, `FieldPlugin`, `ResInfo`, `SNSTopicArnNotFoundFault`, `CheckoutPaymentPage`, `DescribeSchemaCommandInput`, `LogSeriesFragmentPushRequest`, `RulePathEntry`, `FirebaseFirestore.Query`, `InterfaceWithDeclaration`, `ICached`, `PageChangeEvent`, `DiagnosticRelatedInfo`, `LoginForm`, `SwitchContainerProps`, `sdk.Recognizer`, `PipelineValue`, `JSONRPCResponse`, `request.Response`, `BaseCallbackConstructor`, `AttributeKeyAndValue`, `TargetData`, `AssociationLifecycleState`, `UpdateStudioCommandInput`, `IBufferCell`, `ImageEnt`, `dRes_control_c`, `ArcProps`, `GUILocationProperties`, `AuthInterface`, `EmojiService`, `QuickAlgoLibrary`, `PartyMatchmakerAdd_StringPropertiesEntry`, `IPayment`, `VNodeChild`, `DAL.DEVICE_HEAP_ERROR`, `ThyCollapsePanelComponent`, `OrganizationEntity`, `InstallProfile`, `filterInterface`, `AttachVolumeCommandInput`, `LogSplitLayout`, `RLANAnimationTrackType`, `TextPathGeometry`, `requests.ListVmClusterNetworksRequest`, `ResolvedSimpleSavedObject`, `HostWatchFile`, `ConfiguredPluginsClient`, `SQS.Message`, `JsPsych`, `DataGrid.Style`, `TabFragmentImplementation`, `OrganizationProject`, `MultiChannelCCCommandEncapsulation`, `HK`, `ChordNode`, `FileEntry`, `PgdbDataSource`, `DataViewBase`, `MediaRecorder`, `VisualProperties`, `XHRoptions`, `ActionsInTestEnum`, `RefundPayerStore`, `reduxLib.IState`, `NodeEventHandler`, `InteractionSettings`, `PendingWrite`, `TextDelta`, `ObjectMetadata`, `Apollo`, `AStore`, `IStreamPropertiesObject`, `AssetModel`, `TargetProperty`, `LocationMarkModel`, `XrmUiTest`, `ListAction`, `TaggedState`, `ITokensState`, `HTMLBodyElement`, `ReferencePosition`, `SessionEntity`, `MetadataField`, `Plugin.Shared.Definition`, `nodeFunc`, `ToolbarTheme`, `SavedObjectsImportOptions`, `ts.TextSpan`, `AnnotationOptions`, `ExtraDataTypeManager`, `CreateKeyPairCommandInput`, `JSONRPCRequest`, `Coupon`, `LegacyResult`, `ExtensionSettings`, `ReactApolloRawPluginConfig`, `IColorEvent`, `TLE.FunctionSignatureHelp`, `T18`, `PFS_Config`, `ImageClassifierOptions`, `EmitResult`, `InventoryPlug`, `NavigationBarItem`, `IWorkspaceDir`, `ControlledComponentWrapperProps`, `MoveCommand`, `requests.ListResolverEndpointsRequest`, `MenusState`, `EditState`, `AstModule`, `MetadataInfo`, `CfnApi`, `PropItem`, `DialogConfig`, `StyleSheet`, `panel_connector.MessageHandler`, `RequestHandlerEntry`, `FIRAuthDataResult`, `UVFile`, `FolderView`, `CallbackFunc`, `IBlockchainProperties`, `ResponseFixtures`, `ManualOracle`, `InjectorContext`, `BaseAction`, `AuthenticationVirtualMachine`, `ColorSwitchCCSet`, `URL_`, `ResultReason`, `EasyPZ`, `WebhookActionConnector`, `ITool`, `XEvent`, `Oid`, `BackendDetails`, `Aliases`, `RedisModules`, `CreateJobCommand`, `WatchedFile`, `RenderCanvas`, `AwsState`, `ConfigSetName`, `NumberInfo`, `VpnServerConfiguration`, `Drop`, `d.SitemapXmpResults`, `WorkspaceNode`, `NullConfiguration`, `GPUProgram`, `BenchmarkData`, `EmitEvent`, `RecentDatum`, `ActionWithError`, `RadixAtomObservation`, `Remote`, `ColProps`, `EqualFunc`, `CategoryItem`, `TFJSBinding`, `ResourceInsightProjectedUtilizationItem`, `DurationInput`, `MouseData`, `TimePanelProps`, `T.Matcher`, `OpRecInterface`, `ContextT`, `TimerHandler`, `IMutableCategorizedPropertyItem`, `TooltipValue`, `PluginSpec`, `ConvLSTM2DArgs`, `Reminders`, `Logging`, `IMonitorPanelAction`, `MailOptions`, `UpdateFunctionCommandInput`, `MemberRef`, `AthenaRequest`, `JsState`, `OptionEditorComponent`, `DataPoint`, `JacobianPoint`, `FileSystemTrap`, `ReLULayerArgs`, `TomcatServer`, `InflectorRule`, `WaiterConfiguration`, `EnrichedPostageBatch`, `ESLScreenBreakpoint`, `R.Morphism`, `HTTPServer`, `ExternalSubtitlesFile`, `LaunchConfiguration`, `BundlingOptions`, `AuditInfo`, `requests.ListExternalContainerDatabasesRequest`, `VirtualCollection`, `Angulartics2Mixpanel`, `EmailTempState`, `LabelOptions`, `CustomSeriesRenderItemParams`, `EquipmentService`, `StateProvider`, `MatrixProfileInfo`, `AddressBookContact`, `MemoryDump`, `LogMethod`, `Stripe.PaymentIntent`, `BifrostRemoteUser`, `FastifyTypeBoxHandlerMethod`, `JRPCMiddleware`, `GroupInput`, `d.OutputTarget`, `LayerPanel`, `OneListing`, `FunctionProps`, `FieldTemplateProps`, `EditCategoryDto`, `Descriptions`, `CreateDatasetCommandOutput`, `Access`, `ParsedLog`, `RequestFn`, `EmailValidatorAdapter`, `GameObject`, `KayentaCredential`, `CurrencySymbolWidthType`, `DocumentedType`, `RawTestResult`, `MockedFunctionDeep`, `CustomBlock`, `tf.Tensor1D`, `QueueModel`, `ISearchResult`, `BlockhashAndFeeCalculator`, `HighlightRepository`, `VpnClientParameters`, `PLSQLCursorInfos`, `CeloTx`, `IStyleAttr`, `StatesOptionsKey`, `ITdDataTableColumn`, `TaskComponentState`, `SelectOption`, `RawDoc`, `PGOrbit`, `DomainSummary`, `MerkleIntervalInclusionProof`, `IdeaTags`, `RuleTypeRegistry`, `DocumentInterface`, `RoomObject`, `ShareButtonsConfig`, `SpriteAssetPub`, `Preimage`, `requests.ListSourcesRequest`, `TestUiItemsProvider`, `UploadTask`, `DaffCartItemInput`, `EmbeddedRegion`, `TCacheKey`, `ClassListing`, `ComponentTypeEnum`, `MdcDialog`, `StartStopContinue`, `UserLoginData`, `BlockExport`, `PluginWrapper`, `emailAuthentication.Table`, `RouteRecord`, `AjaxAppender`, `TextLayoutParameters`, `Anim`, `OrderTemplatesOverviewPage`, `DAL.DEVICE_ID_RADIO_DATA_READY`, `Convolver`, `InvalidatorSubscription`, `Animated.SharedValue`, `Violation`, `TamaguiInternalConfig`, `Apdex`, `MagentoCart`, `ListDashboardsCommandInput`, `DescribeFleetAttributesCommandInput`, `VRMBlendShapeProxy`, `BackgroundFilterSpec`, `ISceneData`, `IModalService`, `CustomMaterial`, `TransactionLog`, `ENDAttributeValue`, `vscode.FormattingOptions`, `btTransform`, `ToolChoice`, `HostItem`, `ReportStoreService`, `MovementItem`, `DoneInvokeEvent`, `CursorContent`, `DecorationSet`, `TRecord`, `HttpRequestWithLabelsAndTimestampFormatCommandInput`, `GraphQLRequest`, `Contributors`, `SpringSequence`, `META`, `CalcValue`, `TSTypeAnnotation`, `TargetConfig`, `PluginDependencies`, `LoginPage`, `LocalDirName`, `vscode.Command`, `NodeKeyType`, `LeftHandSideExpression`, `ThyOverlayTrigger`, `CreateCustomVerificationEmailTemplateCommandInput`, `GetQueryStatus`, `ConnectionID`, `StdlibRegistry`, `core.IThrottler`, `CacheObject`, `Reason`, `IBaseRequestAction`, `PlayerStatus`, `ArtifactDownloadTicket`, `ChannelService`, `DirItem`, `Criteria`, `GameEvent`, `ValueGetterFunction`, `MessageSender`, `TagName`, `m.Comp`, `ComponentTest`, `JhiEventManager`, `SearchResultProps`, `FormArrayState`, `GeneralImpl`, `IChainableEvent`, `TagKey`, `StackHeaderInterpolationProps`, `d.FunctionalComponent`, `SceneView`, `BaseTypes`, `OperationResult`, `ImageLocation`, `ITestContainerConfig`, `EvaluationStats`, `CancelRequest`, `GfxRendererLayer`, `IBApi`, `UAParserInstance`, `EnumerateType`, `MatFormField`, `XPCOM.nsISupports`, `GundbMetadataStore`, `ContractAPI`, `BrowserSimulation`, `CurrentUser`, `DataAsset`, `DeleteChannelModeratorCommandInput`, `BodyDatum`, `Codefresh`, `YellowPepperService`, `Args`, `JsonFile`, `SelectorDatastoreService`, `CommonDivProps`, `RebaseEntry`, `CssDimValue`, `AtomOrString`, `UpdateChannelMessageCommandInput`, `SafeElementForMouse`, `ClrDatagridStateInterface`, `DestroyArgv`, `CreateNetworkProfileCommandInput`, `InputSearchExpressionGroup`, `Profiler`, `ErrorToastOptions`, `IAstItem`, `Scroll`, `Getters`, `GX.TexGenSrc`, `DialogDelegate`, `IAnnotation`, `Segno`, `TransformOriginAnchorPosition`, `ICUToken`, `INodeDef`, `ui.Rectangle`, `WikiFile`, `AddedKeywordDefinition`, `OnTabReselectedListener`, `CircleResponderModel`, `Venue`, `IssueProps`, `AccountID`, `WebGLRenderingContextExtension`, `HalOptions`, `types.IAzureQuickPickOptions`, `WorkerService`, `HKDF`, `Node.MethodParams`, `WebProvider`, `InMemorySpanExporter`, `FluentUITypes.IDropdownOption`, `NamedItem`, `DateFilter`, `IBookmarkState`, `TabsProps`, `GoToFormProps`, `Material`, `RequiredStringSchema`, `Texlist`, `TopicsService`, `CipherCreateRequest`, `IMatrix44`, `ParserInput`, `PythonDependency`, `GraphData`, `PackageTypeEnum`, `OfflineSigner`, `ValidationErrors`, `ClusterRole`, `Conf`, `Exchange`, `Money`, `CustomMapCache`, `ImmutableBucket`, `TargetDetectorRecipeDetectorRuleSummary`, `PutAssetPropertyValueEntry`, `ModuleNameNode`, `RootContainer`, `ProjectLabelInfo`, `JobValidationMessage`, `MoveTree`, `DialogflowConversation`, `TimestampInMillis`, `IStashTab`, `EventInteractionState`, `MessageViewProps`, `DescribeUserCommandInput`, `AmongUsSession`, `GetEmailTemplateCommandInput`, `TypeTable`, `HomebridgeConfig`, `Equals`, `BlendOperation`, `PlaceholderReference`, `Presence`, `OAuthException`, `ICeloTransaction`, `RootActionType`, `SegmentedBarItem`, `IItemTree`, `ChatCompositeProps`, `MapContext`, `OutputWriter`, `HydrateStaticData`, `SubmissionDetailEntity`, `Texture2D`, `TreeConfig`, `DescribeChannelMembershipForAppInstanceUserCommandInput`, `Bindings`, `IHeaderItem`, `ApplicationLoadBalancedFargateService`, `CreateBidDTO`, `MnemonicLanguages`, `ActivationFunction`, `StreamDeck`, `LiveAnnouncer`, `ReactNodeArray`, `SizeObject`, `Callback`, `UpdateDependenciesParams`, `ChangeType`, `ProtocolMapperRepresentation`, `Hook`, `FunctionProp`, `FunctionTypeFlags`, `PyTypedInfo`, `ListSortMembersSyntax`, `IDroppableItem`, `d.MinifyJsResult`, `MagickSettings`, `GaugeEvent`, `ListKeysRequest`, `FirstMate.Grammar`, `IClusters`, `IWorkflowExecuteHooks`, `XMLBuilderContext`, `VirgilPrivateKey`, `PutAccountsValidationResult`, `RelatedClassInfoJSON`, `SourceMetadata`, `CardListItemType`, `messages.Tag`, `ItBlock`, `IRequestDTO`, `PerformStatArgs`, `DefaultKeys`, `ExternalSourceFactory`, `ProblemEntity`, `PropertyGroup`, `Ping`, `ButtonData`, `SimpleToast`, `WalkNode`, `AssessmentType`, `ScaleString`, `ConnectionTransport`, `UICollectionViewDataSourceImpl`, `ts.FunctionDeclaration`, `CSVOutput`, `SdkProvider`, `TrackCallInfo`, `TlsConfig`, `NamespaceDeclaration`, `ComponentTheme`, `GraphQLHandler`, `CentralTemplateProvider`, `LambdaHandler`, `OpenApiDocument`, `IGroupItem`, `InlineResolved`, `GfxVertexBufferDescriptor`, `DataPointPosition`, `TInjectItem`, `ConcurrentModificationException`, `ComponentHandler`, `SongResult`, `Highcharts.VMLRenderer`, `SutTypes`, `OpenSearchClient`, `NumberSystemType`, `GameEngine`, `FormValue`, `MergeIntersections`, `IndexColumnModelInterface`, `CliqueVote`, `DatabaseSet`, `EmptyEventCreator`, `HashSetStructure`, `DraftEditor`, `RuleExpr`, `ITimeOff`, `M3ModelInstance`, `RawPackages`, `ListElementSize`, `FeatureMap`, `IsNumber`, `UniversalRouter`, `Shall`, `LowpassCombFilter`, `HttpCacheService`, `ImageFilter`, `RendererLike`, `ListServicesResponse`, `UnderlyingAsset`, `MockedDataStore`, `ClarityAbi`, `DateLocale`, `BaseMessage`, `MapConfig`, `IMask`, `CSSVarFunction`, `TimeGridWrapper`, `DaffProductDriverResponse`, `NamespacedWireDispatch`, `ConsoleExpression`, `DatabaseConfiguration`, `Foxx.Response`, `NuxtApp`, `ProjectIdentifier`, `ReleaseDefinitionSchema`, `_TsxComponentV3`, `DevicesButtonProps`, `CreateDBClusterCommandInput`, `WorldBuilder`, `SetInstallPrompt`, `DbLoadCallback`, `VoiceServerUpdate`, `vscode.SymbolKind`, `UpdateInfo`, `MatrixType`, `GitStatus`, `DescribeUserProfileCommandInput`, `Combinator`, `AnyResource`, `IWizard`, `LinearFlowFunction`, `ModelInterface`, `IItemUpdateResult`, `IframeController`, `ClientScopeRepresentation`, `OpUnitType`, `IDragCursorInfos`, `DeleteSettingCommand`, `DataModel.Metadata`, `GeoShape`, `MatchedStory`, `Chart.CallbackFunction`, `TwingLoaderInterface`, `AssociationGroupInfoCCInfoGet`, `HttpServiceSetup`, `EventArg`, `VersionNumbers`, `NotificationPressAction`, `TooltipType`, `RequestSpec`, `TrackType`, `TaskReport`, `CRDPChannel`, `PointerEvent`, `PostsService`, `PNG`, `ConfigurationDTORegions`, `SelectionInfo`, `ILinkProps`, `DateFormatterFn`, `ParsedRule`, `MaterialData`, `ObservableLanguage`, `ReadTransaction`, `OrdererTreeItem`, `MangolLayer`, `ActiveMigrations`, `StartDBClusterCommandInput`, `BuilderProgram`, `PackageId`, `MonoTypeOperatorFunction`, `EthereumPaymentsUtilsConfig`, `requests.ListSessionsRequest`, `DecodedSignature`, `OwnedUpgradeabilityProxyInstance`, `StateMapper`, `EqualityFn`, `MapEnv`, `ScanDb`, `CommandParams`, `IUserUpdateInput`, `ApplicationVersionFile`, `ModuleKind`, `ConvertedType`, `OrganizationUserBulkRequest`, `EventStore`, `RuntimeIndex`, `AllowArray`, `B5`, `TypeDefinitionNode`, `AdvancedSettings`, `LinkingCrossPlatform`, `CdkColumnDef`, `Modifier`, `GeneratorVars`, `EpicMiddleware`, `InjectorType`, `SchemaObjectMetadata`, `ts.ImportSpecifier`, `ICell`, `ErrorMessage`, `ValidatorConfig`, `TileSet`, `BinaryValue`, `SectionComponent`, `ConfigurationCCReport`, `RecordOptions`, `API.storage.PrefObserverFactory`, `DescribeRepositoryAssociationCommandInput`, `FreeCamera`, `TemplateHead`, `EdiSegment`, `WrappedComponentRoot`, `ExpressServer`, `CampaignTimelineBoardViewerChanelsModel`, `CertificateSummary`, `PrerenderUrlRequest`, `ChartCoordinate`, `MeshVertice`, `requests.ListMountTargetsRequest`, `TunnelRequest`, `GridColumnConfig`, `PGTransform`, `TinymathFunction`, `Node_Const`, `PartialResults`, `EthArg`, `ReadonlySymbolSet`, `PatternValueNode`, `MatchmakerMatched_MatchmakerUser_NumericPropertiesEntry`, `TestAudioBuffer`, `EncryptionProtectorName`, `PortablePath`, `Merger`, `Types.RawMessage`, `SpriteService`, `TContainerNode`, `Detection`, `JupyterFrontEndPlugin`, `PreferencesStateModel`, `ConnectorProps`, `ConsumeMessage`, `DatatableArgs`, `IAsset`, `WorldgenRegistry`, `HTMLTableHeaderCellElement`, `URL`, `FlatScancode`, `BTree`, `GraphEdges`, `RuleMeta`, `ConfigurableStartEnd`, `SimpleTest`, `NotificationAction`, `UserWhereInput`, `StorageQuotaExceededFault`, `EventRegistry`, `ListTagsForResourceOutput`, `IExchange`, `MicrosoftSynapseWorkspacesSqlPoolsResources`, `SVGElement`, `ScaledSize`, `ListComponentsCommandInput`, `Teacher`, `VaryMap`, `BundleManifest`, `LineupPlayerPosition`, `NonReactive`, `LocationHelper`, `InputEventMouseMotion`, `GUIOrigin`, `CustomOptions`, `StructDeclaration`, `IApolloServerContext`, `IIterator`, `DownloadRef`, `BrickRenderOptionsResolved`, `JsonConfig`, `Vec3Sym`, `Explanation`, `unified.Processor`, `RequestModel`, `JSDocReturnTag`, `HTMLAudio`, `DS`, `PubsubMessage`, `BitcoinStoredTransaction`, `IQueryProps`, `YarnLock`, `ImagePickerResult`, `AudioProfile`, `GetBucketTaggingCommandInput`, `UpdateWebACLCommandInput`, `ODataPagedResult`, `Releaser`, `Ctx`, `PersonFacade`, `DeltaType`, `DeleteMeetingCommandInput`, `CoverageOptions`, `GoToProps`, `requests.ListLocalPeeringGatewaysRequest`, `SourceMapSpan`, `StyledComponentClass`, `InvalidInput`, `IInputList`, `IContainer`, `FieldName`, `AzureComponentConfig`, `PullAudioOutputStreamImpl`, `DaffAuthLoginReducerState`, `ConnectionState`, `InitWindowProps`, `LineComment`, `tf.io.TrainingConfig`, `CallStatus`, `MainHitObject`, `FolderPreferenceProvider`, `btSoftBody`, `Koa.Context`, `ConstructorAst`, `DeviceInfo`, `DataAdapter`, `ElementFactory`, `ScopedMemento`, `MarkType`, `TemplateDiff`, `PushTransactionArgs`, `GridTile`, `TurndownService`, `MediaDef`, `ToolbarDropdownButtonProps`, `CustomDomain`, `ApplyPredicate`, `IGLTFLoaderData`, `ServicePropertiesModel`, `EnvironmentRecord`, `Chapter`, `SubscriptionResult`, `Assign`, `Markets`, `InvalidDatasourceErrorInfo`, `ResponsiveInfo`, `StateChannel`, `FlattenedType`, `FormEvent`, `IndexedDB`, `Quantity`, `ts.ScriptTarget`, `InitializeHandler`, `IDateFnsLocaleValues`, `PutDedicatedIpInPoolCommandInput`, `ITableField`, `Timestamp`, `DecodedInstruction`, `MessageFormat`, `AngleSweep`, `MatchedMiddleware`, `IBrew`, `SecureHeadersOptions`, `IntervalOptions`, `HyperionWorkerDef`, `GeocodeQueryInterface`, `ApiError`, `CreateOptions`, `LoggerProvider`, `Showable`, `DeleteDatasetCommandInput`, `ParameterizedString`, `ContractKit`, `TypeWithInfo`, `Ord`, `BuilderCanvasData`, `SEOProps`, `DefaultRequestSigner`, `CellDrag`, `MeshFragData`, `ReactiveDBException`, `CkbTxInfo`, `PartialConfig`, `LocalVueType`, `CaseExpr`, `requests.ListVolumesRequest`, `TimestampShape`, `AddTagsCommandInput`, `ParserOptions`, `DaffCategoryFilterToggleRequest`, `CollectionCompilerMeta`, `Fp2`, `AstModuleExportInfo`, `CardAndModule`, `ScannedClass`, `DecoratorType`, `AnyValidateFunction`, `Rx.PartialObserver`, `UiObject`, `Declarations`, `ReferenceUsedBy`, `ElementQueryModifier`, `code.TextDocument`, `JSDocParameterTag`, `SoftwarePackage`, `anchor.BN`, `ColorPresentationParams`, `BufferSize`, `ScriptAst`, `Papa`, `M.Middleware`, `ResourceGroup`, `ModifyPayloadFnMap`, `SeriesDomainsAndData`, `JupyterFrontEnd`, `ScreenCollisions`, `DropdownItemProps`, `AppActions`, `Datetime`, `MapToType`, `AnnotationPointType`, `DeleteDBSubnetGroupCommandInput`, `TagLimitExceededException`, `StringAttribute`, `RegulationHistoryItem`, `IScriptInfo`, `UtilsService`, `PutObjectOptions`, `IAtomMvhd`, `ThemePrepared`, `IApiSecurityRequirement`, `CompletionList`, `HostService`, `EventsMessage`, `ILicense`, `WatchDog`, `IndexPatternsContract`, `GfxInputLayoutDescriptor`, `SymbolDataContext`, `ComplexNestedErrorData`, `MergeCSSProperties`, `RStream`, `ApplicationContext`, `JsonRpcSigner`, `GitContributor`, `Desktop`, `DeploymentHandler`, `MicrosoftComputeExtensionsVirtualMachineScaleSetsExtensionsProperties`, `AreaGeometry`, `ProjectImage`, `HapiServer`, `GameModel`, `IW`, `IMP4AudioSampleEntry`, `HttpEvent`, `RoutingTable`, `ReadModelInterface`, `BillDebtor`, `Pizza`, `FunctionTypeParam`, `Messenger`, `ColonyNetworkClient`, `IPluginPageProps`, `MonitoringResources`, `ClipVector`, `TransactionEvent`, `WebDNNWebGPUContext`, `ApplicationSettingsService`, `IPagingTableState`, `TrackedAbility`, `MetricType`, `AVRExternalInterrupt`, `ISessionContext`, `UpdateChannelResponse`, `Home`, `Setup`, `UICollectionView`, `FetchFinishedAction`, `SavedObjectsMigrationConfigType`, `WebPhoneUserAgent`, `ClassIteratorFlags`, `LazyDisposable`, `React.EffectCallback`, `InterpolationFunction`, `OsdUrlTracker`, `ITimeSlot`, `vscode.Memento`, `PaymentsErrorCode`, `ProjectInformation`, `DynamicDialogConfig`, `Weekday`, `ImGui.DrawList`, `TextMetrics`, `RetrieveResult`, `XRWebGLLayer`, `AnalyzerFileInfo`, `d.BuildCtx`, `LocaleOptions`, `CharacterMaterial`, `DeleteRepositoryCommandInput`, `PopperProps`, `uproxy_core_api.CreateInviteArgs`, `QueryType`, `CompilerErrorResult`, `EmbeddablePersistableStateService`, `vscode.TestRun`, `AndroidOutput`, `Outcome`, `ManagementAppMountParams`, `RequestProgress`, `RequestTemplateReference`, `ts.BlockLike`, `fixResult`, `QueuedEventGroup`, `WithCondition`, `WebSiteManagementModels.FunctionEnvelope`, `Methods`, `PanGestureEventData`, `SemanticTokensLegend`, `requests.ListLogsRequest`, `Logs`, `Print`, `CalendarViewEventTemporaryEvent`, `ClassEntity`, `requests.ListAutonomousDbVersionsRequest`, `EcsEvent`, `PushResponse`, `Delaunay`, `ZodTypeAny`, `EveesMutationCreate`, `RegExpReplacement`, `MintGenerativeData`, `Formatter`, `IRoundState`, `ContractInstance`, `GfxBindings`, `SelExpr`, `IEffectExclusions`, `CustomAvatarOptions`, `PhaseEvent`, `TableSchemaDescriptor`, `Local`, `PhysicalTextureType`, `TagsBase`, `SpacesClient`, `CompactdState`, `Items`, `PanelPackage`, `URLBuilder`, `RateLimit`, `PartyAccept`, `SGroup`, `AWS.AWSError`, `SupportedEncoding`, `DescribeTagsRequest`, `PluralType`, `ObOrPromiseResult`, `OpenSearchDashboardsReactOverlays`, `MinecraftFolder`, `models.NetFramework`, `IVisualizerStyle`, `DeleteClusterResponse`, `social.ClientState`, `ContextType`, `Triangle`, `NcPage`, `Operands`, `ACTION`, `HashConstructor`, `XmlSchema`, `SourceFileLike`, `RenderableProps`, `SearchOpts`, `firebase.app.App`, `ArgError`, `WebsocketRequestBaseI`, `Completion.Item`, `SessionGetter`, `ItemInterface`, `CRDTArray`, `AsyncOperation`, `ObjectQuery`, `GroupMembershipEntity`, `TActor`, `TopicId`, `TextRow`, `LangChangeEvent`, `IndexedTechnique`, `OpenApi.Schema`, `AuthTokenService`, `GradientColor`, `BatchValidator`, `ComponentBed`, `PlanetApplicationService`, `PluginObj`, `MagicSDKError`, `FeatureChild`, `ModeType`, `PanelSide`, `ICanvasRenderingContext`, `CellRepo`, `IBuildApi`, `AudioService`, `AccountDoc`, `ConnectorType`, `StoreEnhancer`, `Mars.TransactionOverrides`, `ListTargetsForPolicyCommandInput`, `IQService`, `MoveOptions`, `BufferArray`, `StringContent`, `So`, `GetPublicAccessBlockCommandInput`, `DeleteDashboardCommandInput`, `MainModule`, `UserProvided`, `BufferEncoding`, `ChatAdapterState`, `BlendMode`, `ScreenshotBuild`, `SetTree`, `FormInputs`, `MultiKeyStoreInfoWithSelectedElem`, `DateProfileGenerator`, `RateLimitOptions`, `FloatAnimationKeyframeHermite`, `SemanticTree`, `Array`, `BrowseResult`, `IProjectInformation`, `RequestArugments`, `RBNFSymbols`, `ThyDialogContainerComponent`, `HitSensorType`, `ListPatternType`, `ITaskAgentApi`, `OverrideCommandOptions`, `ITransaction`, `IdentNode`, `LMapper`, `ServerResponse`, `TextAreaCommandOrchestrator`, `CreateTagsRequest`, `LanguageServerInterface`, `InstallMessage`, `ExtendedFeatureImportance`, `SideEntityController`, `DBDriverResource`, `Of`, `AnalysisCompleteCallback`, `xml.ParserEvent`, `RegEntity`, `FilePathPosition`, `PolicyRates`, `SemanticTokenData`, `SliceNode`, `SCNSceneRenderer`, `Quantity.MANY`, `OperationRequestDetails`, `StepResult`, `TokenBurnV1`, `ts.ForStatement`, `OutputPort`, `FeatureProps`, `CompilerStyleDoc`, `BasisCompressionTypeEnum`, `CallAdapter`, `CustomRender`, `GADRequest`, `FasterqLineModel`, `lf.Predicate`, `BaseSettings`, `PDFRadioGroup`, `AttachmentRequest`, `PreRenderedChunk`, `DeleteThemeCommandInput`, `TypeIR`, `MethodCall`, `IGenericTaskInternal`, `ProtectionRule`, `ISavedObjectTypeRegistry`, `GfxInputState`, `TensorLike1D`, `MakefileConfiguration`, `PivotQuery`, `MockSdkProvider`, `FakePlatform`, `SendPropValue`, `InvoicesService`, `DebugProtocol.SetBreakpointsResponse`, `ReakitTabInitialState`, `ClassTypeResult`, `IAnimal`, `ICircuitState`, `DescribeAddressesCommandInput`, `CloudWatchLogs`, `CommentType`, `IDialogConfiguration`, `UpdateFlowCommandInput`, `ComponentCompilerVirtualProperty`, `MetadataType`, `IDotEnv`, `IObject`, `ToggleButton`, `IExcludedRectangle`, `CourseStore`, `RequestHeader`, `ConstructDataType`, `RendererProps`, `IMeta`, `NavigatorState`, `preValidationHookHandler`, `EntryModule`, `ButtonVariant`, `requests.GetVolumeBackupPolicyAssetAssignmentRequest`, `DayOfWeek`, `Bot`, `OptionGroup`, `DeploySuccess`, `EVENT`, `ECS`, `ITestState`, `XmlTimestampsCommandInput`, `THREE.Mesh`, `ResourceQuota`, `CreateUserDto`, `DataViewColumn`, `ENV`, `APIClient`, `NpmInfo`, `TextRenderParameters`, `KeyIndexMap`, `EventFnError`, `Letter`, `MessageParams`, `RowFormatter`, `RealtimeVolumeIndicator`, `ThyPlacement`, `INodePackageJson`, `binding_grammarListener`, `PA`, `GfxProgramDescriptorSimple`, `TryPath`, `AnimationResult`, `IReserve`, `BasicReflectionEvent`, `Decoration`, `Results`, `TaskWithMeta`, `InitOptions`, `FinalTask`, `GetObjectOutput`, `EmbeddablePackageState`, `CredentialPreview`, `AccountFacebook`, `triggeredTrap`, `WebsocketState`, `ISdkBitrate`, `PolyfaceData`, `PerformWriteArgs`, `HTMLTableColElement`, `HitBlockMap`, `KeyObject`, `Compilation`, `X12Element`, `PlayCase`, `Broadcaster`, `GetCustomVerificationEmailTemplateCommandInput`, `TextDocumentContentProvider`, `SkeletalComponent`, `Conflicts`, `SearchActions`, `XElementData`, `MeetingState`, `ToastInput`, `TableService`, `XTableColumn`, `d.JsonDocsDependencyGraph`, `MsgStartGroup`, `EntityMetadataMap`, `UploadChangeParam`, `ApplicationCommandData`, `RequestQueryParamsType`, `DangerDSLJSONType`, `ListPresetsCommandInput`, `Dynamic`, `CancelSubscription`, `__HttpResponse`, `Orientation`, `Royalty`, `ProtoJson`, `Identity`, `EncodedQuery`, `UIDialogRef`, `SCNNode`, `QualifiedId`, `OperationContract`, `AppOptions`, `Mark`, `CalcIdxType`, `MouseUpEvent`, `Utf8ToUtf32`, `ZeroXOrder`, `AsyncFunction`, `DesugaringContext`, `ThemeReducer`, `GetModelTemplateCommandInput`, `DataViewFieldBase`, `TabbedRangeFilterParams`, `bigint`, `ConversionType`, `MinecraftVersion`, `Queue`, `MathBackendWebGL`, `GetText`, `IHighlight`, `ApolloClient`, `ISortOption`, `ExperimentPhase`, `StatusBarWidgetControl`, `PrincipalPermissions`, `FiberRoot`, `ThemeType`, `Vec2Like`, `SavingsService`, `p5.Graphics`, `firebase.firestore.FirestoreDataConverter`, `SubmitKey`, `NetworkDiff`, `OutputLocation`, `VertexAttributeGenDef`, `ARPosition`, `StudentRepository`, `CompleteGodRolls`, `ScriptSource`, `LCDClient`, `V1CertificateSigningRequest`, `ListingMeta`, `LayerForTest`, `ServiceBuild`, `GX.TexGenMatrix`, `HttpErrorResponse`, `PerimeterEdge`, `ModalController`, `d.CompilerFileWatcherEvent`, `DefaultClient`, `MergeStrategy`, `SchemaToArbitrary`, `NodeKind`, `HistoryViewContext`, `DiscoveryService`, `OutputTargetStats`, `ParsedPlacement`, `CombatantInfoEvent`, `Whitelist`, `PackageUser`, `SerializedSourceAnalysis`, `ListPositionCacheEntry`, `ArrayDiffSegment`, `FabricEnvironmentRegistry`, `MessageCode`, `MapFunc`, `PluginValidateFn`, `AppMountParameters`, `Sql`, `BoosterGraphQLDispatcher`, `IsometricPoint`, `ts2json.DocEntry`, `EmberAnalysis`, `DevtoolsInspectorProps`, `D3_SELECTION`, `PointerStates`, `ISimpleGridEdit`, `BSTProxy`, `CpuRegister.Code`, `ListManagementAgentsRequest`, `GlobStats`, `ItemPredicate`, `ConsoleFake`, `GoldenLayout.ItemConfig`, `TextData`, `SessionsState`, `IConnectionExecutionContextInfo`, `ResolvedGlobalId`, `GraphQLUnionType`, `ISO`, `PathType`, `ParsedArgv`, `IConnectableBinding`, `SandDance.types.Column`, `ToneAudioBuffer`, `MiBrushRepaintConfig`, `ManagedDatabase`, `CommandFn`, `FormEntry`, `JWK`, `ParamFunction`, `Coord`, `AppProduct`, `DataClassEntry`, `PackageMetadata`, `tf.NamedTensorMap`, `CircuitDesigner`, `ODataSingletonResource`, `DelayNode`, `RefreshTokenRepository`, `SeriesUrl`, `App.windows.window.IClassicMenu`, `PopupMessage`, `OrganizationPoliciesConfig`, `Transformable`, `IBucketAggConfig`, `AttachmentData`, `AggregatePriceService`, `IStructuredLicense`, `TallySettingsIni`, `IndTexStage`, `Ribbon`, `ExpressionContext`, `ElementFlags`, `InjectableMetadata`, `HandlerOptions`, `ChangeInstallStatus`, `IRawBackupPolicy`, `ReadRequest`, `App.windows.window.IXulTrees`, `DayFitnessTrendModel`, `TabState`, `QueryManager`, `ResultSet`, `ActionPayload`, `NoticeEntity`, `PropertyAst`, `SuggestQueryInterface`, `DependencyMapEntry`, `EXECUTING_RESULT`, `VNodeData`, `HealingValue`, `DisplayValue`, `Fail`, `InstructionWithTextAndHeader`, `StorageValuesV7`, `ReportFunnel`, `Mocha`, `MetalsTreeViewNode`, `IGameMessage`, `IEditorPosition`, `SparseMerkleTreeImpl`, `IEnumerable`, `types.TextDocumentIdentifier`, `HeadElement`, `UniqueEntityID`, `HypermergeWrapper`, `StatefulCallClient`, `FiltersBucketAggDependencies`, `AllStyleOption`, `CheckPrivateLinkServiceVisibilityRequest`, `BufferVisitor`, `ApiKeyProps`, `DiscogsTrack`, `OperatorFunction`, `RemoteObject`, `EqualityDeciderInput`, `ElementAspect`, `InvocationContext`, `CommandClient`, `ControlFormItemSpec`, `JasmineBeforeAfterFn`, `Tilemap`, `CredentialService`, `TimeRangeInformation`, `RedirectionResponse`, `IMessage`, `NavigationGuard`, `AggregationRestrictions`, `Vec2Term`, `NVM3Page`, `GitCommitLine`, `PlainValue`, `TResource`, `DateAxis`, `FolderUpload`, `UIPageViewController`, `Finished`, `SVGGElement`, `BOOL`, `UISchemaElement`, `ConfigEntity`, `WorkItemTypeField`, `PhysicalElement`, `DecryptedMessage`, `PointCandidate`, `ImportFacebookFriendsRequest`, `sinon.SinonSpyCall`, `GetGeneratorOptions`, `SessionConnection`, `FaunaPaginateOptions`, `StopExperimentCommandInput`, `AggParamsItem`, `ListAst`, `AliasMapEntry`, `PromiseResult`, `ServiceKey`, `TextOptions`, `ValidatorsFunction`, `BaseDbField`, `LoginInfo`, `PerpV2BaseToken`, `DocumentOnTypeFormattingParams`, `AggregateSpec`, `VoidFunction`, `CreateDBClusterEndpointCommandInput`, `AutoRest`, `HttpCall`, `GfxDeviceLimits`, `ReviewerEntity`, `UpdateManyParams`, `SetupFunc`, `requests.CancelWorkRequestRequest`, `StateValue`, `ValueAndUnit`, `AccessExpression`, `Fetch`, `SerializeCxt`, `PluginStreamAction`, `SingleRepositoryStatisticsState`, `CanvasModel`, `IStorageWrapper`, `SpaceMembershipProps`, `ServiceClass`, `ContextMenuItemModel`, `App.Context`, `FlowBranchLabel`, `InjectedMetamaskExtension`, `Chat`, `DeserializeWireOptions`, `sdk.SessionEventArgs`, `RegistrationForm`, `IBufferService`, `paneType`, `ActionCodeSettings`, `AClassWithSetter`, `ProgramState`, `UploadInfo`, `EmbeddableFactoryDefinition`, `Filter`, `Feature`, `Tremolo`, `PatternLike`, `FirebaseAuthState`, `IOperatorIdentifier`, `Rule.RuleFixer`, `ListStorageObjectsRequest`, `ControllerClass`, `IRegistryInfo`, `TextureCoordinateType`, `LogStream`, `ColumnMetaData`, `FrontstageDef`, `ValueMetadataBuffer`, `TextClassification`, `BuilderProgramState`, `ConfigProps`, `SliceAction`, `RequestBodyParserOptions`, `Trampoline`, `t.MemberExpression`, `Knex.Config`, `MethodParams.ProposeInstall`, `DiffView`, `ViewPortManager`, `d.CompilerBuildStart`, `EventUi`, `ConsoleTransportInstance`, `RenderLeafProps`, `SnapshotFragmentMap`, `BasicScene`, `UsedHashOnion`, `AngularExternalTemplate`, `requests.ListBootVolumeBackupsRequest`, `Events.postdraw`, `EarningsTestService`, `PatternAtomNode`, `OptionalDefaultValueOrFunction`, `DecodedTile`, `ColorLike`, `CompositeCollection`, `LinearLayout`, `CompletionSymbol`, `FetchCache`, `ChatServerConnection`, `FilterMode`, `pxt.TargetBundle`, `Provider`, `T.Effect`, `Ivl`, `PathMatcher`, `DataChannel`, `C6`, `ExtendedMesh`, `MapLike`, `BootstrappedSingleSpaAngularOptions`, `LevelLogger`, `requests.ListBackupsRequest`, `HtmlParser`, `SkillService`, `messages.Hook`, `HasTaskState`, `ListObjectsV2Output`, `GPUShaderModule`, `WWA`, `ReducerArg`, `DataTableRow`, `ColumnWorkItemState`, `AssignAction`, `ListParticipantsResponse`, `AaiMessage`, `FsReadOptions`, `SecurityClass`, `BindingInfo`, `PullToRefresh`, `GlobalVarsService`, `SflTester`, `_1.Operator`, `UIEventSource`, `ThemeGetter`, `DefaultContext`, `ProposalMessage`, `IPluginAPI`, `Translation`, `HashedFolderAndFileType`, `ExperimentalStickering`, `IEdge`, `ValueMetadataBoolean`, `HsLayerFlatNode`, `CommandQueueContext`, `WrapOptions`, `AttandanceDetail`, `PoolClient`, `Manipulator`, `CollisionInfo`, `UserLoginResource`, `ISupportCodeExecutor`, `TestStepResult`, `PortProvider`, `DispatchType`, `chrome.runtime.MessageSender`, `ElementMaker`, `ZenObservable.Observer`, `ReknownClient`, `PrimitiveType`, `PageType`, `IGetRefParamsExternal`, `OriginConnectionPosition`, `ApolloReactHooks.MutationHookOptions`, `ICompilerResult`, `SceneActuatorConfigurationCCReport`, `TexMap`, `TextRange`, `DeleteEmailIdentityCommandInput`, `TabComponent`, `SpacedRepetitionSettingsDelegate`, `RSAPrivateKey`, `ITemplateItem`, `TsOptionComponent`, `BlobStorageContext`, `NotificationOptions`, `TemplateProps`, `UnregisteredAccount`, `GeoPointLike`, `ListHttpProbeResultsRequest`, `Paged`, `MTD`, `Roles`, `CapacityProviderStrategyItem`, `SessionStorageCookieOptions`, `IValidationResponse`, `IHubSearchOptions`, `Dep`, `CreateSavedObjectsResult`, `requests.ListRoutingPoliciesRequest`, `StreamerConfig`, `AppwriteProjectConfiguration`, `DescribeCertificatesCommandInput`, `DaffSubdivisionFactory`, `ThyTreeSelectNode`, `NeovimClient`, `IntCodeComputer`, `TestDataSource`, `ElectronService`, `SingularReaderSelector`, `Resolution`, `WithElement`, `LanguageClient`, `Creep`, `VectorKeyframeTrack`, `ScaleOptions`, `TalentMaterial`, `SyncNotifyModel`, `GameMap`, `MagitBranch`, `ObjectBinding`, `ContextContributorFactory`, `Signals`, `V1PersistentVolume`, `HsQueryVectorService`, `ModifyDBInstanceCommandInput`, `CmsModelPlugin`, `HsLayerDescriptor`, `CalendarItem`, `WorkspaceType`, `MarkdownIt`, `StatsService`, `Swiper`, `SyncTable`, `IRequestUserInfo`, `TemplateParameters`, `ParseFn`, `FirewallRule`, `requests.ListPublicIpsRequest`, `NexusInputObjectTypeDef`, `Span`, `SVGDOMElement`, `StackResultsMatcher`, `TargetTypesMap`, `CanvasSpaceNumbers`, `RBNFSetBase`, `ReboostInstance`, `LabelModel`, `SingleKeyRange`, `NewsItemModel`, `CoapRequestParams`, `IGridAddress`, `KeyframeAnimation`, `IExtensionPlugin`, `KintoResponse`, `CompressionOptions`, `UpdateGlobalSettingsCommandInput`, `DatabaseEventBus`, `EzRules`, `Endpoint`, `SdkSubscribeAckFrame`, `ThemeManager`, `ICXGenericResult`, `TeamsState`, `MachineConfig`, `IArtifact`, `ScullyContentService`, `SyncModule`, `DetectedFeatures`, `AlreadyExistsException`, `IRNG`, `PDFDocumentProxy`, `LanguageHandlers`, `CloudWatchDestination`, `CollapseProps`, `CompiledPredicate`, `d.OutputTargetDistTypes`, `UpdateChanges`, `ExportService`, `NgrxFormControlId`, `BlockModel`, `MarketResponse`, `ConfigConfig`, `PutFeedbackCommandInput`, `InternalCase`, `Vorgangsposition`, `HandPoseOperatipnParams`, `MoonbeamDatasource`, `FeeLevel.Medium`, `ShadowTarget`, `NetworkRecorder`, `PiecePosition`, `DemoBlockType`, `FormfieldBase`, `PaymentChannelJSON`, `FormatCodeSettings`, `ActionExecutionContext`, `ICloudTimerList`, `monaco.editor.IStandaloneCodeEditor`, `Spaces`, `amqplib.Options.Publish`, `IOpenSearchDashboardsSearchResponse`, `Supports`, `ActionTypeBase`, `InstantiableRule`, `MenuDataAccessor`, `ConfigurationData`, `Get`, `AxisMilestone`, `DaffAddress`, `TransitionEvent`, `Single`, `memory`, `NexeFile`, `DeleteLoadBalancerCommandInput`, `SignedStateWithHash`, `WorkspaceManager`, `CommandLineAction`, `UiKit.BlockContext`, `HandleOutput`, `Bundler`, `FilePathKey`, `EnabledFeatures`, `StateObject`, `Directive`, `ArangojsResponse`, `GeometryProvider`, `Nibble`, `PendingTestFunction`, `MatchDataSend`, `DateTimeParts`, `TinyPgParams`, `RuleTarget`, `MongoMemoryServer`, `SqrlFeatureSlot`, `TypeOperatorNode`, `AppointmentUnwrappedGroup`, `Gesture`, `Submission`, `WikiItem`, `vscode.Hover`, `MediaTrackSettings`, `UserDoc`, `UniswapFixture`, `ResolvedElementMove`, `CompilerConfiguration`, `SemanticNode`, `EventListenerCallback`, `ICollectionTrigger`, `GetMessagingSessionEndpointCommandInput`, `IModuleMap`, `NodeService`, `AxisGeometry`, `LicenseStatus`, `ListTasksCommandInput`, `HeaderMapTypeValues`, `FactoryUser`, `PartialResolvedVersion`, `StandardResponse`, `AtomState`, `EntitySprite`, `ValidationFunction`, `ApiInterfaceRx`, `AudioPlayerState`, `ListrTaskWrapper`, `Tardigrades`, `ExpandedAnimator`, `UsersState`, `IStopsProvider`, `BezierPoints`, `PreProcessor`, `DashboardComponent`, `GenerateMappingData`, `MoveType`, `GeometryHandler`, `IGetActivitiesInput`, `TestSerializer`, `RenameEntityEvent`, `DiscordInteraction`, `IHooks`, `DataType`, `PaginatedList`, `FileWithPath`, `MetamaskPolkadotSnap`, `OpenSearchDashboardsSocket`, `CohortService`, `FormValidation`, `PDFCrossRefStream`, `ParsingState`, `GuaribasAnswer`, `StandardSkillBuilder`, `ICurrentControlValidators`, `Loop`, `RootStore`, `NedbDatastore`, `UninterpretedOption_NamePart`, `SpellList`, `EslingPlugin`, `JIssue`, `IPolygonGeometry`, `FuncVersion`, `d.RobotsTxtOpts`, `MonthData`, `ColorInput`, `ActionCreatorWithOptionalPayload`, `BufferTypeValues`, `InanoSQLTable`, `AABBOverlapResult`, `NodeSSH`, `u64spill`, `Signer`, `InterpolatorFactory`, `CCIndicatorSensor`, `SendResponse`, `RPCConnection`, `PrefixLogger`, `NodePhase`, `TLIntersection`, `DeauthenticationResult`, `SubscriberRepository`, `DocumentError`, `LonLatArray`, `SpatialImagesContract`, `EmployeeLevelService`, `React.FunctionComponent`, `RemoteNodeSet`, `PythonPreviewManager`, `IUserModel`, `ItemData`, `WrapExportedEnum`, `FrameBuffer`, `MainAccessRequest`, `RouterOptions`, `FSMState`, `SecurityScheme`, `ICoreMouseEvent`, `RemoteConfig`, `UpdateApplicationRequest`, `IAuthor`, `GenericTwoValuesAndChildren`, `IntrospectionTypeRef`, `TemplateResult`, `IIconProps`, `MigrateFunctionsObject`, `bool`, `RedBlackTreeEntry`, `SearchableContainerInput`, `TableAccessFullStep`, `ServiceId`, `d.JsonDocsUsage`, `AddGatewayV1`, `TransformSchemaOptions`, `ExpansionModule`, `FunctionContext`, `PerfEntry`, `SearchParamAsset`, `SignedCanonicalOrder`, `ICommandResult`, `DurationMs`, `CalculatorTestContext`, `SelectProps`, `NamedStyles`, `TarTransform`, `DatedAthleteSettingsModel`, `BriefcaseDb`, `ClearableMessageBuffer`, `ToastService`, `GLProgram`, `PacketNumber`, `TAttrs`, `NavResponse`, `StagePanelSection`, `Story`, `VideoStreamOptions`, `EffectsInvocationContext`, `RegisterReq`, `MergeRequestPayload`, `DataResolverOutputHook`, `HandlebarsTemplate`, `PopoverContextValue`, `AutoAcceptProof`, `SharedMap`, `IClassification`, `SRTFlags`, `SmoothedPolyline`, `DocTableLegacyProps`, `DeleteYankChangeAction`, `HasPos`, `ValueMetadataString`, `ClientOptions`, `RRect`, `IPoint`, `RouteResult`, `IdentityProviderSelectionPage`, `XmlFile`, `glm.mat4`, `_OIDCConfiguration`, `SpeechRecognitionEventArgs`, `GfxInputStateP_GL`, `ServiceArgs`, `JSMs.XPCOMUtils`, `MarkerOptions`, `ImagePickerControllerDelegate`, `ProblemModel`, `Loader`, `TimeInfo`, `ThroughputSettingsUpdateParameters`, `MarkdownService`, `PubSubEngine`, `RegTestContainer`, `IReportEmbedConfiguration`, `requests.ListComputeImageCapabilitySchemasRequest`, `FactoryContextDefinition`, `EventCreatorFn`, `UniversalAliasTable`, `io.IOHandler`, `Account`, `RestSession`, `Level3`, `IUsageMap`, `TopicsMap`, `FlatIndex`, `AppEpic`, `templateDataType`, `ItemUUID`, `Color`, `AuthorizationPayload`, `ListPartsCommandInput`, `AttributeTableData`, `UserSettingsStorage`, `TopologyService`, `IOdataAnnotations`, `AutoTranslateServiceAPI`, `IRule`, `EveesConfig`, `CastNode`, `DocumentAccessList`, `NormalCollection`, `BlobURL`, `GenerationStatus`, `Research`, `Roots`, `ActivityPubActor`, `JsonSchemaRegisterContext`, `ColorSchemeId`, `Example`, `QState`, `JRPCRequest`, `CriteriaNode`, `ValueTypeOfParameter`, `PLSQLConnection`, `CdtTriangle`, `TestHostComponent`, `Gateway`, `RunEvent`, `StorageData`, `DescribeScalingActivitiesCommandInput`, `MatchExp`, `NewTootState`, `ChartRef`, `UpdateManyResponse`, `UpdateAccountCommandInput`, `UsePaginatedQueryState`, `AxisTick`, `SignalingClientConnectionRequest`, `LineDashes`, `ChangeSetData`, `EntityFactory`, `Pooling3DLayerArgs`, `GX.IndTexFormat`, `ProfileServiceProxy`, `LoadingController`, `GlobalEventDispatcher`, `CollisionZone`, `requests.ListAutonomousDatabaseClonesRequest`, `TreeviewComponent`, `Services`, `IngredientForm`, `Control3D`, `AssertionLevel`, `EngineArgs.CreateMigrationInput`, `NameInfoType`, `Comment`, `NetworkEdge`, `DIContainer`, `TsSelectedFile`, `VariantForm`, `ComponentType`, `PatternParams`, `PoolConfig`, `MetricService`, `AutorestLogger`, `ts.ExportSpecifier`, `IActionInputs`, `IUserWithRoles`, `PrepareEnvVariablesProps`, `Refinement`, `AudioSource`, `IRecordedDB`, `SignalData`, `AlertContextOptions`, `CompilerSystemWriteFileResults`, `PerModuleNameCache`, `core.VirtualNetworkClient`, `VertexAttribute`, `Cache`, `Background`, `SChildElement`, `MDCChipActionAttributes`, `MdcFormField`, `Palette`, `FirestoreAPI.Value`, `ReactiveObject`, `MutableList`, `ConfigurationModule`, `PlayerInstant`, `requests.ListTargetDatabasesRequest`, `DeleteAccountsRequestMessage`, `BlobLeaseAdapter`, `TodosPresentST`, `ScriptBuilder`, `NumberShape`, `ReadAllCallback`, `Cascade`, `SeriesMarkerRendererDataItem`, `DeleteEventSubscriptionMessage`, `types.Message`, `ParsedNumber`, `SnsDestination`, `AngleFromTo`, `MIRFunctionParameter`, `RoleService`, `STData`, `RendererOptions`, `DejaTilesComponent`, `ClassScheme`, `ResolveModuleIdOptions`, `IdxTree`, `DatabaseFacade`, `ZeroXPlaceTradeDisplayParams`, `Stack.Props`, `RetryKeycloakAdmin`, `InternalDiagnostic`, `ParsedRoute`, `Input.Gamepad`, `TextTransformType`, `BuildDecoratorCommand`, `ISolutionWithFileIds`, `NodeMaterialBlock`, `ParserAstContext`, `DrawState`, `ClientStringService`, `IHTMLInjection`, `requests.ListPoliciesRequest`, `Nuxt`, `KeyValueChanges`, `Graph2`, `SignInResult`, `CaseClause`, `PointerInput`, `CheerioStatic`, `Runnable`, `AccountFilterParams`, `TestMessage`, `TableQuery`, `ISchema`, `ColumnProps`, `AbstractRegion`, `ParquetCodec`, `EntityLoaderOptions`, `angular.ICompileService`, `Greeter`, `ExploreState`, `CodeGenResult`, `PresentationTreeNodeLoaderProps`, `SFCDeclProps`, `ResolveTree`, `SubschemaArgs`, `Promised`, `AnyElt`, `requests.ListBootVolumeAttachmentsRequest`, `UIError`, `BubleDataType`, `SavedObject`, `Strip`, `TypedReflection`, `ContractWrapper`, `ExtractModConfig`, `CeloTransactionObject`, `ResourceKeyList`, `UrlPropertyValueRenderer`, `Readme`, `BinaryTreeNode`, `UniDriver`, `DateTableContext`, `TransformParams`, `ProviderResource`, `IGraph`, `CTransactionSegWit`, `PutDeliverabilityDashboardOptionCommandInput`, `Bangumi`, `QuerySnapshotCallback`, `ArrayIterator`, `UploadFile`, `SeedAndMnemonic`, `InputOptions`, `CloudFrontRequestEvent`, `EventArguments`, `IFormSection`, `BinaryOperationNode`, `ProtonApiError`, `StreamManager`, `QueryBarTopRowProps`, `DeviceConfigIndexEntry`, `ShippingState`, `SrtpContext`, `IAppConfig`, `requests.ListDbSystemShapesRequest`, `NormalModule`, `DocumentSnapshotCallback`, `ConflictMap`, `PluginResultData`, `TestPage`, `IrisIcon`, `IConsoleResponse`, `ScriptingDefinition`, `Separator`, `ESTreeNode`, `BedrockFileInfo`, `CompileKey`, `Type_AnyPointer_Unconstrained`, `Duration`, `ResolvedTupleAtomType`, `CommitterMap`, `DropTableNode`, `StepExecution`, `Angulartics2GoogleGlobalSiteTag`, `FIRQuerySnapshot`, `PaymentMethodCreateParams.BillingDetails`, `UrlParam`, `EngineArgs.PlanMigrationInput`, `SavedSearch`, `jdspec.PacketInfo`, `TradeComputed`, `IConfirmProps`, `MockedElement`, `DefaultProps`, `AcMapComponent`, `RequestInfo`, `HierarchyCompareInfoJSON`, `Applicative2`, `ReportingConfig`, `MaskModel`, `AllFile`, `TreeViewExpansionEvent`, `UploadFileStatus`, `SwapEncoder`, `IChannelModel`, `DMMF.SchemaArgInputType`, `ComponentDefinition`, `Entrypoint`, `QR`, `StylingBindingData`, `GetPolicyVersionCommandInput`, `SchemaFactory`, `Alternatives`, `LintMessage`, `InputParamValue`, `ConnectOptions`, `PrivateIdentifierInfo`, `IconRegistryService`, `IORedis.RedisOptions`, `LogError`, `HSD_TECnst`, `DirectChannel`, `FormContext`, `SlotOp`, `IPCResult`, `BaseOption`, `InMsg`, `RegisterCertificateCommandInput`, `_N`, `AnyCallbackType`, `GetRoomCommandInput`, `PDFDict`, `InvalidTagException`, `android.graphics.Typeface`, `RBNFInst`, `ReferenceToken`, `RootObject`, `SequentialLogMatcher`, `CieloTransactionInterface`, `ContentState`, `Player`, `BuildrootUpdateType`, `MatCheckbox`, `SigningCosmWasmClient`, `MappingItem`, `UniqueOptions`, `CppParseTree`, `EmbeddableOptions`, `server.DataLimit`, `ReturnTypeFuncValue`, `MVTFieldDescriptor`, `Hapi.Server`, `JavaScriptRenderer`, `NamePosInfo`, `Highcharts.AnnotationPointType`, `ShoppingCartState`, `Benchee.Options`, `AppLogger`, `ProbabilitySemiringMapping`, `GraphTxnOutput`, `PushOptions`, `CreatePhotosDto`, `InboundMessageContext`, `Measure`, `AnnounceNumberNumberCurvePrimitive`, `GroupByColumn`, `AutorestSyncLogger`, `SignalOptions`, `DragEvent`, `MassMigrationCommitment`, `DisplayAccessKey`, `Mesh3D`, `CategorySegment`, `ModelPath`, `SerializerTypes`, `BadgeSampleProps`, `ExtractDto`, `IEntityInfo`, `ast.AssignNode`, `UserRef`, `TensorWithState`, `Portion`, `TypeApp`, `Curl`, `DescribeCertificateCommandInput`, `SimpleSignedTransferAppState`, `IGetUserInvitationOptions`, `MapObjectAdapterParams`, `ConverseContext`, `CustomCallbackArgs`, `IPluginSettings`, `AudioConfigImpl`, `ListKeysCommandInput`, `AppOption`, `Endian`, `AppInstance`, `ts.VariableDeclarationList`, `AnimGroupData_Draw`, `EndpointAuthorization`, `AppResourcesModel`, `VocabularyCategory`, `AstNodeContent`, `VcalDateOrDateTimeProperty`, `PatternMappingKeyEntryNode`, `OptionsMatrix`, `CostMetric`, `UpdateFileSystemCommandInput`, `ExpectedNode`, `InsightObject`, `TextInputVM`, `TransitionType`, `SpecPage`, `ListenerCallback`, `AnimationNodeContent`, `FilterCategory`, `NextHandleFunction`, `GeniePlugin`, `IChild`, `SavedObjectsExportablePredicate`, `GLTFFileLoader`, `TypeResolvingContext`, `IFormTemplate`, `SlashDot`, `QueryMiddleware`, `MemberForm`, `MatchFunction`, `SWRKey`, `ReferenceExpression`, `RSSI`, `requests.ListDbHomesRequest`, `SortColumn`, `MatchModel`, `SetupServerApi`, `BufferData`, `NzSelectItemInterface`, `LoadedTexture`, `PDFButton`, `WebsocketService`, `ValueType`, `RetryHandler`, `CommunOptions`, `KeyCompoundSelector`, `LintOptions`, `LoggingConfiguration`, `IDBDatabase`, `ApplicationListenerArgs`, `StableSwap`, `WorkItemQuery`, `EmissionsController`, `XmlEmptyBlobsCommandInput`, `RegisteredServiceSingleSignOnParticipationPolicy`, `ExposureMode`, `CryptoCurrency`, `ElementGeometryInfo`, `CSG`, `Miscellaneous`, `MatDialogContainer`, `IForm`, `DaffCategoryFilterEqualOption`, `EntityComparer`, `TimefilterContract`, `CommandRegistry`, `IVideoPlayerState`, `MySet`, `requests.ListExternalPluggableDatabasesRequest`, `InternalRouteInfo`, `QueryParamConfig`, `ColumnFilters`, `SolverConfig`, `UISession`, `CmsIndexEntry`, `IBoxSizing`, `BaseThemedCssFunction`, `TNSDOMMatrix`, `InsertUpdate`, `EncryptionConfig`, `UniformState`, `Runtime.Port`, `ResolvingLazyPromise`, `SharedTree`, `SingleOrMultiple`, `AppBarProps`, `SpreadAssignment`, `UpdateDomainResponse`, `FastRTCPeer`, `SignatureProviderRequestEnvelope`, `CompilerInput`, `ConnectionMode`, `TestSuiteInfo`, `ValidationResultsWrapper`, `V1Namespace`, `Calculator`, `requests.ListDbVersionsRequest`, `InsightOptions`, `SelectorAstNode`, `BleepsGenerics`, `IBasicSessionWithSubscription`, `AllureGroup`, `FlowAssignment`, `nanoid`, `NodeURL.URL`, `NodeMaterial`, `React.ForwardRefRenderFunction`, `GulpClient.Gulp`, `vscode.QuickPick`, `Filterable`, `TransactionType`, `ComponentWithMeta`, `socketIo.Socket`, `InternalServerErrorException`, `Harmony`, `SCSSParser`, `CreateFolderCommandInput`, `ChunkList`, `ConfigurableFocusTrap`, `DescribeReservationCommandInput`, `ReadonlyVec4`, `Rule.RuleListener`, `SackChunk`, `CartEntity`, `HTMLPropsWithRefCallback`, `IPanesState`, `TranslationAction`, `PokemonType`, `RevalidatorOptions`, `PuzzleID`, `SubscriptionData`, `UnusedAttribute`, `Transpiler`, `EntityUID`, `LedgerWriteReplyResponse`, `IObservableArray`, `HJPlayerConfig`, `SchematicContext`, `WorkerMessageType`, `Ruleset`, `MGLMapView`, `RoundingModeType`, `LegendType`, `SettableUserCode`, `RemoveEventListenerFunction`, `SpeakerService`, `PerspectiveDataLoader`, `LayoutCompatibilityReport`, `GlobalContext`, `ResolverInfo`, `G1`, `CreateSavedObjectsParams`, `CallNode`, `BinderFindBindedPositionRet`, `ReconnectingWebsocket`, `QueryFlag`, `FixOptions`, `DeleteAppInstanceUserCommandInput`, `DAL.DEVICE_ID_TOUCH_SENSOR`, `StyleElement`, `IAMCPCommand`, `IDBOpenDBRequest`, `ISearchParams`, `GlobalConstraintRegistrarWrapper`, `T3`, `IWatchExpressionFn`, `DisabledTimeFn`, `Events.pointerdragmove`, `P2SVpnConnectionRequest`, `ConvertedRemoteConfig`, `MdcTab`, `ScryptedNativeId`, `RuleManager`, `DeleteReplicationConfigurationTemplateCommandInput`, `ɵɵInjectableDef`, `EDateSort`, `CSSValue`, `GX.TevColorChan`, `ICommand`, `PaginationOptions`, `ColumnInfo`, `HalfBond`, `HexDocument`, `DeployedCodePackageCollection`, `SequelizeOptions`, `EditorProps`, `SQLite3Types`, `GuidString`, `AfterGenesisBlockApplyContext`, `AngularFireFunctions`, `SequenceComponent`, `TextDiff`, `MemberMethodDecl`, `messages.PickleDocString`, `OpenAPISchema`, `ServiceInfo`, `FileSystemEntries`, `AttributeValue`, `Interfaces.IBroker`, `AWSError`, `Plane3dByOriginAndUnitNormal`, `WatchOfConfigFile`, `EmitterConfig`, `Workflow`, `ITestBillingGroup`, `InstanceWrapper`, `SwitcherItemWithoutChildren`, `PrintResultType`, `ProjectColumn`, `DependencyKey`, `CodeEditor.IEditor`, `CurriedFunction3`, `ITimeLog`, `Connection`, `ContactPayload`, `ComponentSymbolTable`, `BridgeableChannel`, `SmallLicense`, `DxModelContstructor`, `ArticleDetail`, `ReconnectDisplay`, `FeeExtensionsConfig`, `Normalized`, `BuildHandler`, `QueryStringFilter`, `CallHierarchyService`, `I18nMutateOpCodes`, `ScryptedInterfaceProperty`, `FunnelCorrelation`, `AnalysisResponse`, `TYPE_AMOUNT`, `SuperTest`, `TokenCredentialsBase`, `MarkdownItNode`, `VisualGroup`, `WebhookRequestData`, `REPLServer`, `IMessageDefinition`, `Compatible`, `Glyph`, `DescribeAppInstanceCommandInput`, `CharData`, `CoreUsageStats`, `UsbDevice`, `Progress.INeonNotification`, `HsStylerService`, `ControllerInterface`, `IEmployeeJobPost`, `DbPatch`, `SvelteDocumentSnapshot`, `TeamWithoutMembers`, `DxTemplateHost`, `ContentTypeReader`, `AdalService`, `NodeDocument`, `AssertionTemplateResult`, `Bignum`, `Totals`, `DataViewBaseState`, `QComponent`, `ASTParserTree`, `RectInfo`, `JsonType`, `JwtKeyMapping`, `PropertyAssignment`, `RegionConfig`, `RawExpression`, `UnsupportedBrowsers`, `AnySpec`, `AuthMode`, `ChainParam`, `RadarChart`, `StorefrontApiModule`, `SimpleDeep`, `WebSocketServer`, `CoreFeature`, `ExtHandler`, `EvCallData`, `LineNode`, `BaseFunction`, `TheoryDescriptor`, `DefaultRequestOptions`, `GfxSamplerP_WebGPU`, `DeviceManager`, `DayHeaderWrapper`, `NexusInterfaceTypeDef`, `IGetExpenseInput`, `DescribeChannelModeratedByAppInstanceUserCommandInput`, `IBlockOverview`, `DatabaseInterface`, `AddTagsOutput`, `UpdateQueryNode`, `UserMetadataStore`, `GetDeliverabilityTestReportCommandInput`, `VoteChoices`, `FlowsenseCategorizedToken`, `Canonizer`, `ScoreRecord`, `SelectorNode`, `RxLang`, `SecurityGroupContextProviderPlugin`, `Net.Socket`, `RuntimeExtensionMajorVersions`, `DependencyChecker`, `CreateJobCommandInput`, `PartBody`, `StatusType`, `PluginName`, `DeleteClusterRequest`, `TimeOffsetInterval`, `IndexedCallback`, `ObjectCacheEntry`, `FieldsTree`, `Let`, `ViberTypes.MessageOptions`, `IDatabaseApiOptions`, `IMediatorMapper`, `DebugProtocol.StepInResponse`, `TemplateDocument`, `CommandEvent`, `MediaMatcher`, `DeepStateItem`, `NpmConfig`, `SortBy`, `ApiRoute`, `ParsedMessage`, `LayoutDto`, `NgModuleMetadata`, `DefaultEditorAggAddProps`, `TeslaStyleSolarPowerCard`, `ConfigurationScope`, `DefinitionLocations`, `requests.ListInstanceAgentPluginsRequest`, `d.ComponentCompilerEvent`, `DeleteCustomVerificationEmailTemplateCommandInput`, `FunctionSignature`, `Calendar_Contracts.CalendarEvent`, `GeometriesCounts`, `DecimalFormatter`, `ListView`, `AnnotationDomainType`, `DistinctValuesRpcRequestOptions`, `SavedObjectsClientWrapperFactory`, `DebugProtocol.Response`, `FullNote`, `TelemetryNotificationsConstructor`, `ThyScrollService`, `ReferencedSymbolDefinitionInfo`, `ParserError`, `AnimationControls`, `BucketInfo`, `ActorLightInfo`, `ProseNode`, `TrackingData`, `PiConceptProperty`, `IRequireMotionAction`, `CtrLte`, `jsPDFDocument`, `QueryTreeNode`, `EquipmentStatus`, `ChannelPickerItemState`, `LedgerTransport`, `FeatureSymbology.Overrides`, `ZWaveFeature`, `SPHttpClient`, `d.OutputTargetDistLazyLoader`, `ActivityService`, `TextEditorSelectionChangeEvent`, `CreateProjectRequest`, `IStream`, `core.ETHGetAccountPath`, `HTMLAttribute`, `BackendMock`, `EditablePoint`, `GlobalStoreDict`, `ts.LanguageServiceHost`, `Suffix`, `RxJsonSchema`, `RuleWithCnt`, `CodelistService`, `LinkRecordType`, `TermEnv`, `estypes.AggregationsAggregationContainer`, `ILookUpArray`, `CoreImageEnt`, `IUpdateOrganizationCommandArgs`, `PartitionLayout`, `MockERC20`, `AirnodeRrp`, `Relationships`, `SpotifyService`, `TimeOffRequest`, `lf.schema.Table`, `UpSampling2D`, `vscode.TextEditorEdit`, `ICreateResult`, `ODataUri`, `EventParameter`, `TIcu`, `ArrayBindingElement`, `SimpleRenderer`, `IOSNotificationPermissions`, `IWebhookData`, `DataLakePrincipal`, `InstanceInfo`, `ClusterVulnerabilityReport`, `IsolationStrategy`, `RequestMatcher`, `ITokenProvider`, `SchemaObject`, `MapStateToProps`, `LuaComment`, `DiscoverStartPlugins`, `d.ScreenshotBuildData`, `apid.ProgramId`, `ImageRequestInfo`, `IDataFrame`, `RowRenderer`, `LabelProps`, `MovementState`, `IApplicationOptions`, `MinimalNodeEntryEntity`, `ITimesheet`, `SessionStorageSinks`, `ApexTestNode`, `ServerSession`, `CreateAppInstanceCommandInput`, `FilterContext`, `JSDocsType`, `SchemaRootKind`, `MediationStateChangedEvent`, `IUploadItem`, `VideoModes`, `SapphireDbOptions`, `CircularLinkedListNode`, `FilmQueryListWrapper`, `ContentRequestOptions`, `MutationConfig`, `apid.UnixtimeMS`, `ScaleThreshold`, `IDynamicOptions`, `AssetTotal`, `NativePlatformDefinition`, `BuildOptions`, `ContractNode`, `PrimAction`, `PendingUpdateDetails`, `AthleteUpdateModel`, `MetricData`, `RPCClient`, `AddTagsInput`, `ColumnId`, `ProjectionResult`, `CLM.ActionBase`, `RedisCommand`, `Cart`, `SortParam`, `IIArticlesState`, `SearchEmbeddableConfig`, `ContactMock`, `TableBatchSerialization`, `ZoomBehavior`, `OptimizeCssInput`, `ComponentRef`, `RNG`, `IC`, `INpmDependency`, `HttpResponseOK`, `BudgetGroup`, `INetworkPlayer`, `Inspector`, `IDirectory`, `HttpResponseObject`, `ImageData`, `CreateSubnetGroupCommandInput`, `UniqueId`, `WasmQueryData`, `requests.DeleteWorkRequestRequest`, `ServiceTemplate`, `ImportedConfiguration`, `UserActions`, `Invite`, `RopeBase`, `PluralRules`, `StorableComponent`, `WebsocketClient`, `Stitches.PropertyValue`, `DocgeniHostWatchOptions`, `IHSL`, `VirtualCloudNetwork`, `FrontMatter`, `ITab`, `EventResponse`, `AthenaExecutionResult`, `ParameterNameValue`, `Attribution`, `PrometheusClient`, `Utilities`, `TranslationProject`, `ParserResult`, `StructureLink`, `FormAppSetting`, `IPlDocTemplate`, `Neo4jService`, `AaiDocument`, `RequestStatus`, `CommentKind`, `FactoryBuilderQueryContract`, `ICreatorOptions`, `ImpressionSender`, `ConfirmDialogProps`, `Relation`, `WordCharacterKind`, `IBaseView`, `OrderDTO`, `DukBreakPoint`, `DirectionalLight`, `IntervalCollection`, `CdsInternalOverlay`, `SimpleStateScope`, `ComponentReference`, `DescribeRepositoriesCommandInput`, `RGB`, `SearchFacetOperatorType`, `SessionCsrfService`, `RangesCache`, `TriggerInteractions`, `TrieMap`, `UserMembership`, `BotTelemetryClient`, `IKeycodeCategoryInfo`, `InjectionToken`, `HexString`, `ReferencesNode`, `FormPropertyFactory`, `Objects`, `DisplayOptions`, `GeoJSON.Feature`, `FakePrometheusClient`, `LitElement`, `ClassProperty`, `AuthContext`, `DataSourceParameters`, `ContextParameters`, `TNew`, `x.ec2.SecurityGroup`, `GraphVertex`, `TemplateToTemplateResult`, `CompletionTriggerKind`, `RoomLayout`, `DefinitionProvider`, `IOptionTypes`, `TestViewController`, `IFieldsAndMethods`, `Yendor.IPersister`, `ExchangePriceRepository`, `HTTPMethod`, `requests.ListIamWorkRequestsRequest`, `TemplateTag`, `Providers`, `SinkBehavior`, `DateTimeData`, `IGenericTag`, `TriplesecDecryptSignature`, `ContainerGetPropertiesResponse`, `GameModule`, `IPersistence`, `CommandsMutation`, `HeaderGetter`, `ControllerFactory`, `TestRunner`, `SchemaProps`, `EventManagerConfig`, `MultiChannelAssociationCC`, `paper.Path`, `ZRText`, `PromiseSocket`, `DiscoverFieldProps`, `IDocumentServiceFactory`, `IMyTimeAwayItem`, `SwPush`, `ItemSearchResult`, `ContactDto`, `ReturnTypeFunc`, `RouteFactory`, `ApplicationTargetGroup`, `MockProvider`, `IAnalysisState`, `TabsConfig`, `ExternalCliOptions`, `SelectionChangeEventArgs`, `Build`, `Types.PostId`, `LineElement`, `ScreenshotCache`, `Dict`, `FieldArrayWithId`, `TokenBalance`, `ListDevicesCommandInput`, `Template`, `SWRConfiguration`, `ModelSpecBuilder`, `MovementType`, `MzInjectionService`, `Allocation`, `IWalkthroughStep`, `Vuex.Store`, `StaticMeshAsset`, `SelectItemDirective`, `SetElemOverlap`, `SavedObjectsImportError`, `JsonSourceFile`, `UsageCollectionSetup`, `ServiceType`, `VisualizationDimensionGroupConfig`, `MergedProblem`, `TokenSource`, `InputType`, `ExchangeInstance`, `RankingItem`, `DFAState`, `UserInfoInterface`, `NbMenuService`, `PresetOptions`, `PartitionStyle`, `d.FsReaddirOptions`, `MDCChipCssClasses`, `Schema$Sheet`, `DSTInfo`, `CustomerRepository`, `BespokeClient`, `SerialBuffer`, `AddonProperty`, `DaffCompositeProduct`, `FacemeshConfig`, `PoxInfo`, `VisitFn`, `ts.FileWatcher`, `UIViewAnimationTransition`, `ScalarType`, `TRequestWithUser`, `RawContract`, `ASSET_CHAIN`, `InitiateLayerUploadCommandInput`, `OpenChannelEvent`, `DragRefInternal`, `IErrData`, `HsLayoutService`, `SimpleLinkedTransferAppState`, `App.services.IRequestService`, `LoginEntity`, `AzHttpClient`, `FormContextValues`, `VisSavedObject`, `Duplex`, `ConditionTypeEnum`, `Contour`, `FunctionEntity`, `Season`, `FeltReport`, `TickerFuncItem`, `NbJSThemeOptions`, `d.ListenOptions`, `A.App`, `ClientAPI`, `AtomShellType`, `Context`, `MeetingEvent`, `StringOrNumber`, `Headers`, `cp.ChildProcess`, `PureSelectors`, `SubjectDataSetColumn`, `SearchSourceOptions`, `EntityMetadata`, `FilterType`, `HttpPayloadWithStructureCommandInput`, `XYZValuesObject`, `SfdxFalconProject`, `ThyAutocompleteConfig`, `array`, `OrOptions`, `RemoteBaseMock`, `AllState`, `NamedTensorMap`, `NavigateFunction`, `AllDocsResponse`, `ControlBarButtonProps`, `DBConnectionConfig`, `ItemModel`, `PsbtTxInput`, `ValidPropertyType`, `HttpsAgent`, `MaterialLayer`, `IPageRenderInstruction`, `com.google.firebase.firestore.FirebaseFirestoreException`, `ShareUserMetadata`, `TableFactory`, `AuthTokenEntity`, `UpdateUserResponse`, `ComponentBuilder`, `HTTPBuffer`, `TEventRangeType`, `NglInternalDate`, `CharRenderInfo`, `FlowNodeTypeResult`, `ManyToOneOptions`, `Vector2Like`, `Foxx.Router`, `QueryHistoryNode`, `pw.Page`, `Tspan`, `XrmStatic`, `Int16`, `SceneStore`, `DebugEditorModel`, `DaffCartTotal`, `JPABaseEmitter`, `PageResource`, `ConflictResolution`, `SecurityRule`, `IUIDefine`, `EntityContainer`, `DeleteResourcePolicyCommandOutput`, `IRChart`, `MigrateCommand`, `NotificationRequest`, `ValueMetadata`, `WorkerAccessor`, `TaskConfigurationScope`, `DBDriver`, `IPlayable`, `ModelQueryBuilderContract`, `FullName`, `Edit`, `NullableLocatable`, `ListNode`, `CompilerBuffer`, `PearlDiverSearchStates`, `AlbumType`, `IModelAppOptions`, `TableListViewProps`, `IInputProps`, `RoomInterface`, `DefaultEditorDataTabProps`, `Secrets`, `JSXExpressionContainer`, `ICountryGroup`, `EqlCreateSchema`, `ICheckboxProps`, `Evaluation.Response`, `WsViewstateService`, `Typography`, `FunctionAppService`, `Result`, `UnwrapRef`, `void`, `TabularCallback`, `EmitAndSemanticDiagnosticsBuilderProgram`, `interop.Reference`, `ErrorCodes`, `ServerDataImportStore`, `ComponentCompilerWatch`, `PropertyValueRendererContext`, `KVPair`, `CliCommandExecution`, `ListUsersResponse`, `OptionalWNodeFactory`, `TypeAlias`, `FormAzureStorageMounts`, `PacketChunk.TypeTCCStatusVectorChunk`, `JSON`, `HR`, `ModeManager`, `GeneratorFile`, `cc.Event.EventTouch`, `NotImplementedYetErrorInfo`, `SagaIterator`, `IVariable`, `ColorMap`, `ArrowFunction`, `TranslationChangeEvent`, `HydrateCacheOptions`, `StyleSetEvaluator`, `JsonLayout`, `LookUp`, `d.OutputTargetHydrate`, `LogParser`, `requests.ListSecretBundleVersionsRequest`, `POISearchParams`, `DataService`, `TodoService`, `CosmosDBManagementClient`, `IDomainEvent`, `GfxRenderPassDescriptor`, `CompiledResult`, `NetworkEndpointType`, `ColliderShape`, `ClassMemberLookup`, `TsExpansionPanelComponent`, `CdsInternalPopup`, `GenericOperationDefinition`, `FileSystemReader`, `BindingForm`, `GithubUserResponse`, `HoveredNavItemPayload`, `VitePluginConfig`, `IInterceptors`, `Idea`, `DeleteDomainCommandInput`, `CountOptions`, `LimitOrder`, `messages.DocString`, `BasePin`, `FutureBoolean`, `GLTFPrimitive`, `AppElement`, `ConnectController`, `AbstractSyntaxTree`, `ANodeStmList`, `MonitorState`, `BaseContext`, `ClientConfig`, `tf.io.ModelJSON`, `XUploadNode`, `SystemIconProps`, `ConversationTarget`, `TransformerOptions`, `WorkRootKind`, `HdmiInput`, `ESLToggleable`, `angular.IHttpService`, `ExpoWebGLRenderingContext`, `SignedMessageWithOnePassphrase`, `three.Geometry`, `ReleaseTag`, `ParseParams`, `TransformBaseline`, `ProxySettings`, `ParsedProperty`, `ConnectFailedListener`, `IFormatterParserResult`, `IssueStatus`, `CredOffer`, `AssertionContext`, `MdcSnackbarService`, `KeyframeNode`, `ConversationItem`, `IPublicKey`, `MealForm`, `IClassParts`, `Datastore.Context`, `ISelectorMap`, `TraceCallback`, `CollisionBox`, `LinkedNodeList`, `IteratorResult`, `UITabBarItem`, `ApprovalPolicyService`, `WordStyle`, `__HeaderBag`, `ODataRequest`, `ExecInspectInfo`, `Prism`, `MapDispatchProps`, `InterfaceDeclarationStructure`, `Prioritized`, `ColumnSettings`, `SessionTypes.Proposal`, `HsLaymanLayerDescriptor`, `RotationallySymmetricShape`, `WebhookEvent`, `TypeEquality`, `ValueFormField`, `ComponentMetadata`, `LetterSpacing`, `IPersonState`, `TimeoutRacer`, `PedersenParams`, `Id64Set`, `MaxAttrs`, `AList`, `CollectionEvent`, `SMTEntityDecl`, `PQP.Language.Type.TPrimitiveType`, `Top`, `WebAppCollection`, `ProjectOptions`, `NoncurrentVersionTransition`, `AutocompleteContext`, `MatTooltipDefaultOptions`, `OrganizationProjectsService`, `ResolvedRoute`, `Aser`, `FinancialViewEntry`, `StreamActivityModel`, `BitWriter2`, `CertificateDTO`, `TupleTypeReference`, `TemplateContext`, `CharacterCreationPage`, `ResourceId`, `LinterGetOffensesFunction`, `VideoConverter`, `TimerOptions`, `TreeNodesType`, `IRowMarker`, `IntrinsicTypeDescriptor`, `RemoteUser`, `ThemeBuilder`, `AlertSummary`, `TextMatchPattern`, `ListDevicesCommandOutput`, `ESTree.MethodDefinition`, `HomeState`, `JobResult`, `IGetTimeLogReportInput`, `IntegerParameterRange`, `Timestamped`, `CpuUsage`, `HTMLBRElement`, `AudioItem`, `QueryMessage`, `AvatarSize`, `SyntheticPerformanceBudget`, `FileUploadService.Context`, `ExclusiveTestFunction`, `TaskStatus`, `DeleteSchemaCommandInput`, `MouseWheelEvent`, `mapTypes.YandexMap`, `types.CSSProperties`, `ScannerState`, `SobjectResult`, `Drawable`, `DependOnFileCondition`, `CosmosBalance`, `CommandClasses.Basic`, `ErrorCallback`, `ResolvedRouteInfo`, `NodeModuleWithCompile`, `MicroframeworkSettings`, `SteeringPolicyPriorityAnswerData`, `AppsState`, `ObjectSet`, `ast.MacroCallNode`, `Variants`, `ListAvailabilityDomainsResponse`, `KeyboardManager`, `HitDatabaseMap`, `Postable`, `FindRoute`, `RBNFNode`, `CompositionTypeEnum`, `ViewableRobot`, `ApiJob`, `HitInfo`, `ESAssetMetadata`, `ActionQueue`, `JSONTopLevel`, `Quote`, `IWaveFormat`, `requests.ListManagedInstanceGroupsRequest`, `Accelerometer`, `LicenseState`, `DeleteTagsCommandOutput`, `TestTag`, `IO`, `PluginImport`, `ContentDirection`, `IToastCache`, `ListOfferingsCommandInput`, `ICurrency`, `ComponentConfiguration`, `ProjectStorage`, `NetworkingState`, `TickLabelBounds`, `EntityActionFactory`, `OidcClientService`, `UtilProvider`, `ContentRect`, `HierarchyRequestOptions`, `GestureState`, `ITokenObject`, `DocumentWatcher`, `ParsedEnumValuesMap`, `HaveIBeenPwnedApiResponse`, `IAssetComponent`, `ModelPredictArgs`, `requests.ListInstancesRequest`, `DashboardState`, `RenderingContext2D`, `MemoryStream`, `SecurityRating`, `Serializer`, `GeoBoundingBoxFilter`, `JPARandom`, `IRange`, `AuthenticationMethodInfo`, `V1DeleteOptions`, `SelectSeriesHandlerParams`, `ResolverRule`, `ThreeEvent`, `ThyFormDirective`, `CodeCommit`, `SubscriptionCallback`, `IJsonRPCError`, `ConcatenateLayerArgs`, `IconifyAPIIconsQueryParams`, `ByteBuffer`, `GPUDevice`, `DescribeReplicationConfigurationTemplatesCommandInput`, `PropertyLike`, `EventDispatcherEntry`, `Living`, `RouterNavigation`, `UpdateTemplateCommandInput`, `crypto.BinaryLike`, `AssignmentExpression`, `DoneFn`, `Beneficiary`, `RotType`, `FabricNode`, `GeoCoordinatesLike`, `MetadataScanner`, `StyledProps`, `RegionGetter`, `SelectChangeEvent`, `d.LoadConfigInit`, `IDBFactory`, `Primitive`, `Tsoa.Parameter`, `EventActionCallable`, `NestedPayloadType`, `TranslationPartialState`, `FormFieldModel`, `UpdateAliasCommandInput`, `ProviderInfo`, `Rpc`, `TestAwsKmsMrkAwareSymmetricDiscoveryKeyring`, `DeleteNamespaceCommandInput`, `IStorageOperationModel`, `TextureLoader`, `InventorySocket`, `AveragePooling3D`, `SvgPoint`, `VersionInfo`, `ELU`, `SFCBlock`, `BisenetV2CelebAMaskOperatipnParams`, `TokenVerifier`, `CandidateTechnologiesService`, `MonoStyleViews`, `BlockChainUser`, `BinaryOpComplexProgram`, `usize`, `IController.IFunction`, `DkrTexture`, `DocumentChangeAction`, `EnumField`, `FacetOption`, `CategoryModel`, `SpawnResult`, `RankingEntry`, `SendChannelMessageCommandInput`, `DChoice`, `BaseGraphRewriteBuilder`, `android.graphics.drawable.BitmapDrawable`, `ElasticsearchFeatureConfig`, `ShapeData`, `IClusterHealthChunk`, `PackageName`, `TypedPropertyDescriptor`, `NodeHeaders`, `WCLFight`, `Bzl`, `IAdjacencyBonus`, `GlimmerComponent`, `ShaderDescriptor`, `IEmailProvider`, `Cast`, `IUpdateStacksCommandArgs`, `ImageScrollBar`, `NodeSet`, `LayoutStateModel`, `IAmazonFunctionUpsertCommand`, `IHelpCenter`, `FileWatcherEventType`, `MapFunction`, `GestureController`, `LogicalElement`, `Buttons`, `ObjectContainerParams`, `IntervalType`, `webpack.RuleSetRule`, `NestedRecord`, `GridLayout`, `ProofStatus`, `TypeVblDecl`, `SliderComponent`, `KPuzzleDefinition`, `UIProps`, `ITimezoneOption`, `GraphQLInputFieldConfigMap`, `Options`, `UpdateWorkspaceCommandInput`, `SanitizerFn`, `IVirtualPosition`, `SymbolTracker`, `ServiceOptions`, `ReturnTypes`, `BaseApplication`, `AWS.DynamoDB.DocumentClient.Key`, `PrintableArea`, `RestApi`, `BuilderRun`, `JSONChunk`, `DefaultResourceOptions`, `AssetVersion`, `TreeNodeInterface`, `vscode.ConfigurationChangeEvent`, `StringLiteralExpr`, `ArmResourceDescriptor`, `SpineAnimation`, `GuildMessage`, `AuthStatus`, `EdmxFunctionImportV4`, `ValidateResult`, `SecurityReport`, `JSDocNonNullableType`, `ChannelResult`, `BackgroundProps`, `PrismService`, `FlowVariableAnnotation`, `ITaskLibrary`, `TLPointerEventHandler`, `Uint32List`, `requests.ListPublicationsRequest`, `FieldElement`, `pointerState`, `JsonPointerTokens`, `BorderRadius`, `KeyInfo`, `backend_util.BackendValues`, `ElementHarness`, `Cube`, `AnnotationObject`, `MenuInfo`, `ConfigFactory`, `ResolvedValue`, `TranslateHttpLoader`, `TabId`, `PipeState`, `MsgCreateDeployment`, `StellarCreateTransactionOptions`, `DataSeriesDatum`, `GroupDataService`, `NgForage`, `Applicative`, `HealthStatus`, `OAuthUserConfig`, `TestVisitor`, `ClockRotate`, `Positioner`, `_1.Operator.fλ`, `DiagnosticsCallFeatureState`, `DebugAction`, `CreateHitTesterFn`, `Union2`, `ISubgraph`, `K7`, `Game`, `SourceIntegrationInterface`, `https.ServerOptions`, `DefineDatas`, `EditorPackage`, `NetworkScope`, `SVGTSpanElement`, `CoreConnection`, `CarSpec`, `IPeer`, `AzureDeployerService`, `IClientRegistrarOptions`, `InstanceType`, `SpeculativeTypeTracker`, `ThemePlugin`, `SCXML.Event`, `RequestHandlerParams`, `GameDataInterface`, `FetchData`, `KeyAgreement`, `ClusterConfig`, `DiagramMakerNode`, `TransitionDoneFn`, `PublicationDocument`, `Module`, `RecommendationLifecycleDetail`, `CustomRouteShorthandOptions`, `SessionCache`, `IBooleanFlag`, `Lanes`, `Step`, `AuthoringWorkspaceService`, `Communicator`, `CallbackType`, `GT`, `SupportedLocale`, `ChatMessageType`, `ContactResponse`, `HDKeychain`, `LogHook`, `TextElementLists`, `TilemapSeries`, `FileSet`, `s.Field`, `LIST_ACTION`, `FirestoreSimple`, `RestConfigurationMethod`, `RandomNormalArgs`, `IEventInfo`, `Pair`, `DataGridColumn`, `t_08f7c2ac`, `TimelineActivityKind`, `WetPlaceholder`, `Peak`, `MatchProps`, `TransferBatch`, `ResolvedDependency`, `LinkProps`, `StateBase`, `TestApi`, `GetAdministratorAccountCommandInput`, `ValidTimeoutID`, `RoleOption`, `SkeletonProps`, `OctokitType`, `ControlContainer`, `Shard`, `ExpressRequest`, `Week`, `TreeDataProvider`, `UIBeanStorage`, `Environments`, `DebtItemInterface`, `TestFailure`, `ServerModel`, `ImperativeBase`, `ChildSchoolRelation`, `immutable.Set`, `GenericStatusModel`, `CarModel`, `NavigationGuardNext`, `EditorService`, `StackFrame`, `TransmartPackerHttpService`, `TypeMoq.IMock`, `ValuesMap`, `ICardFactory`, `LiveShare`, `Cubemap`, `AudioWorkletNode`, `IScriptCode`, `MetaClient`, `SingleOrArray`, `AccountFilterData`, `IndentedWriter`, `ColorFactory`, `ColumnMeta`, `AnnotationLineProps`, `StorageObject`, `OpenCLBuffer`, `IUILayoutViewController`, `ProjectType`, `MockClass`, `SupCore.Data.EntryNode`, `NodeI`, `HelpCenterAuthorService`, `IRemix`, `ContentTypeService`, `CollisionPartsFilterFunc`, `DCollection`, `EmitContext`, `EntityActionParam`, `ReservedParameters`, `IModelRpcProps`, `FacsimileStorage`, `PublicCryptoKey`, `ConstructorOrField`, `CalendarHeatmapDataSummary`, `FieldSpec`, `EqualityConstraint`, `TupleAssignmentNode`, `IHWKeyState`, `ResponseReceivedEvent`, `MarkerDoc`, `TreemapSeriesNodeItemOption`, `LocationItem`, `dayjs.Dayjs`, `RowGroup`, `CodeSpellCheckerExtension`, `AlainI18NService`, `ToplevelRecord`, `LSTM`, `CountdownEvent`, `PropsWithChildren`, `Protocol.Network.RequestWillBeSentEvent`, `XsltPackage`, `JsonStringifierTransformerContext`, `THREE.ShaderMaterial`, `SGMark`, `JSONSearchParams`, `InteractionStore`, `IMembership`, `RowRenderTreeType`, `CollapsedTransform`, `ComponentDescriptor`, `DidResolutionOptions`, `EC`, `SourceDescriptionItem`, `RequestOpts`, `IObserver`, `SiblingGroup`, `VectorView`, `jwt.SignOptions`, `RecurringActivity`, `TabsState`, `GQLResolver`, `HistoryStatus`, `CharacterSetECI`, `RichText`, `HierarchicalEntity`, `RequestCredentials`, `RecommendationLevel`, `RollupBlockSubmitter`, `MethodMaterial`, `SVFunc`, `CreateElementRequiredOptions`, `IExplanationMap`, `DvServiceFactory`, `FirebaseFirestore.CollectionReference`, `ProviderFrameElement`, `vscode.TextDocumentChangeEvent`, `monaco.editor.IEditorMouseEvent`, `ZipsonWriter`, `ColumnFilterDescriptor`, `AnimatorRef`, `ConfigAggregator`, `Mill`, `VersionMismatchFinderEntity`, `CreateOneInputType`, `Pilotwings64FSFile`, `LitecoinPaymentsUtilsConfig`, `SiteData`, `XmlNamespacesCommandInput`, `DemoAppAction`, `ScanDetails`, `JavaMethod`, `ts.TupleTypeNode`, `Sequential`, `webpack.Configuration`, `PutResourcePolicyRequest`, `InteractionMode`, `IsGroupIndentCellFn`, `TestSuite`, `PolyfaceVisitor`, `LegendStateProps`, `ReadonlyPartialJSONValue`, `NoteItemComponent`, `SignedCipherObject`, `ComponentFactoryResolver`, `NlsBundle`, `ChangePasswordRequest`, `DiscoveredMethodWithMeta`, `EmitType`, `ActionByType`, `AnyItemDef`, `ViewContainerRef`, `CompoundSchema`, `MagickFile`, `DirectiveDefinitionNode`, `ASScope`, `MlRoute`, `ConnectivityInfo`, `ComposableFunctionArgs`, `IMessageMetadata`, `SharedGeometryStateStyle`, `ShapeConfig`, `FileLocation`, `SurveyLogicType`, `WebGLResourceHandle`, `AddTagsToResourceMessage`, `ChildProcess.ChildProcess`, `ScanResult`, `QueueSSEService`, `Snake`, `ConfigurationsClient`, `Vector`, `DomainInfo`, `ImageHelper`, `IWebhookMatchedResult`, `GroupItemDef`, `ActionItem`, `GatewayShardsInfo`, `StructureTerminal`, `providers.JsonRpcProvider`, `JsonResult`, `RailPart`, `UrbitVisorConsumerExtension`, `ExtractedCodeBlock`, `ResourcesModel`, `HistoryOptions`, `SymbolWithParent`, `StateEither`, `T.RenderFunction`, `PathInfo`, `NzFormatEmitEvent`, `tracing.ReadableSpan`, `Ids`, `CloseButtonProps`, `AlgoFn`, `BeforeUnloadEvent`, `CipherGCM`, `ListTablesResponse`, `ProjectMap`, `ModalNavigationService`, `BrowserFields`, `CandidateInterviewersService`, `WalletBalance`, `IOSNotificationCategory`, `ElementAction`, `UseMetaState`, `GoogleStrategy.Profile`, `Recognizer`, `http.Server`, `Create`, `AnyExpressionTypeDefinition`, `WebSocketMessage`, `Tuple`, `AmqpConnection`, `Subspace`, `Recipe`, `Then`, `CubemapSky`, `Gate`, `DictionaryNode`, `RestApplication`, `SpendingConditionOpts`, `EnsuredMountedHTMLElement`, `LegendOrientation`, `ServiceDescriptorProto`, `VideoPreferences`, `AnalyicsReporterConfig`, `NamedCurveKeyPair`, `DigestCommandOptions`, `EnableOrganizationAdminAccountCommandInput`, `PeriodData`, `SessionProposal`, `PieVisualizationState`, `NormalizedModule`, `IScript`, `ListDomainDeliverabilityCampaignsCommandInput`, `TheiaBrowserWindowOptions`, `NodeTracerProvider`, `DstatementContext`, `requests.GetDomainRecordsRequest`, `NumberInputProps`, `object`, `CalendarContext`, `SeoService`, `IEnvironment`, `IReduxAction`, `CanvasImageSource`, `BalanceRequest`, `GlobalReplicationConfig`, `IAllTokenData`, `AggParamType`, `ValueEdge`, `EventStreamSeed`, `PacketEntity`, `TProductFilter`, `VanessaGherkinProvider`, `JHistory`, `BackupData`, `Conv2DConfig`, `SecretsManager`, `UtxoInfoWithSats`, `IPluginConfig`, `NucleusFile`, `ErrorBoundaryState`, `GlobalEventHandlers`, `ReplayEntity`, `Advisor`, `DescribeJobCommandInput`, `CISource`, `DescribeDeviceCommandInput`, `NgrxJsonApiStore`, `NodeParser`, `Phaser.Types.Core.GameConfig`, `DirectedEdge`, `SpeculativeContext`, `GetServersCommandInput`, `ProcessId`, `KeywordToken`, `Scale`, `WEBGL_debug_renderer_info`, `SortKeyRule`, `PipelineId`, `RendererNode`, `ProxyValue`, `ParamSchema`, `PlaneAltitudeEvaluator`, `ContextWithFeedback`, `XFilter`, `IndexPatternsServiceDeps`, `DataField`, `Pokemon`, `GovernanceAccountType`, `BTCSignedTx`, `LoadedEnv`, `MerchantOrderEntity`, `FeatureFlagType`, `DescribeEventAggregatesCommandInput`, `UploadMetadata`, `InterfaceMapValue`, `ApiClient`, `JWTSignOptions`, `Problem`, `ProjectServer`, `TagRenderingConfig`, `GitUrl`, `ToastsApi`, `VisorSubscription`, `Modification`, `ExtendedCluster`, `SourceFileContext`, `SplitField`, `IMobileTarget`, `AxisTitleOptions`, `ViewProps`, `IAuthUser`, `VehicleInfo`, `OnBoardConfig`, `React.Dispatch`, `TBigNumber`, `EventModelImplUnion`, `SharedElementNode`, `TestContextData`, `IncomingHttpRequest`, `ConfigBuilder`, `MapboxGeoJSONFeature`, `BeforeCaseCallback`, `Res`, `PositionTranslate`, `ArenaNodeInline`, `CSSScope`, `MatchReport`, `Stack`, `RenderCamera`, `ColorRef`, `DaffCartCouponFactory`, `RoomMember`, `PluginConfigItem`, `TestInfo`, `InlineFieldDescriptor`, `SignatureFlags`, `DefinitelyTypedTypeRun`, `SearchResultsAlbum`, `InterpolateData`, `TableEntry`, `ObjectCsvStringifier`, `AdmZip`, `InterpolationCurve3d`, `AR`, `d.PrerenderResults`, `GlobalConfig`, `EC_Public_JsonWebKey`, `PagerBase`, `yargs.CommandModule`, `IProductOptionGroupTranslatable`, `capnp.Pointer`, `SfdxFalconResult`, `XmlMapsCommandInput`, `PrivateUser`, `Renderers`, `ReuseCustomContextMenu`, `blockchain_txn`, `DagOperator`, `CaseBlock`, `PropTypesOf`, `UpdaterService`, `RouteParam`, `ReplyRequest`, `SnippetModel`, `ImportNamespace.Interface2`, `IEdgeAD`, `SignedByDecider`, `TestGraphic`, `Languages`, `PlatformService`, `KanbanSplitResult`, `DaffCategoryFilterToggleRequestEqualFactory`, `WorkspaceFoldersChangeEvent`, `SubMiddlewareBuilder`, `DiscoverUrlGeneratorState`, `ReadGeneratedFile`, `AppServiceBot`, `VariableService`, `ArrayCriteriaNode`, `DeleteStorageObjectsRequest`, `PaginationNavProps`, `vscode.TextEdit`, `GX.IndTexStageID`, `DriverContext`, `RRdomTreeNode`, `ErrorExpressionCategory`, `DebugProtocol.ScopesResponse`, `DeviceManagerClient`, `BookingsModel`, `FeatureDescriptor`, `PlayerInfo`, `BodyElement`, `PaginationResponseV2`, `ViewerOptions`, `iconType`, `Pipette`, `IEquipmentSharingPolicy`, `TimerState`, `SortOption`, `PatternUnknownProperty`, `SharedContents`, `IResponseAggConfig`, `TokenRequest`, `TriggerType`, `DejaPopupReponse`, `AbortSignal`, `RecordData`, `OneOf`, `instantiation.IConstructorSignature4`, `DeletePolicyVersionCommandInput`, `ITerminalContext`, `EnumerationDefinitionSchema`, `AddTagsToResourceCommandInput`, `LocalActorSystemRef`, `TokenData`, `HTMLIonTabElement`, `JoinerCore`, `UI`, `OgmaPrintOptions`, `Filesystem.FileExistsSync`, `BeforeCaseContext`, `RightHandSideEntry`, `DecompilerEnv`, `Twitter`, `IImageData`, `ListTasksRequest`, `MockDrake`, `AssociationCCRemove`, `CyclicTimeFrame`, `CreateApplicationCommand`, `WriteConditionalHeadersValidator`, `IOSNotificationAttachment`, `ProjectSettings`, `Protocol`, `R2`, `TreeModelChanges`, `React.MutableRefObject`, `ContractTransactionOverrides`, `VirtualCell`, `callback`, `DeleteFn`, `SubEntityData`, `FluidObjectMap`, `Prisma`, `DOMExplorerDashboard`, `RegExpOne`, `SceneActuatorConfigurationCCGet`, `QuickPickItem`, `Poll`, `PluginDefinition`, `KeySignature`, `Function1`, `DataTableFormatProps`, `DeleteKeyPairCommandInput`, `FlowLogInformation`, `CellEventArgs`, `Spy`, `EditPropertyConfig`, `OrganizationDepartmentService`, `IFieldProps`, `DrawerControl`, `InvalidNextTokenException`, `AssetPropertyValue`, `EnvironmentResource`, `ChangeSetType`, `FunctionDeclaration`, `SourceEngineView`, `KeypairBytes`, `TimeBuckets`, `NamespaceExportDeclaration`, `EthereumPaymentsUtils`, `TransactionWalletOperation`, `DirectoryObjectListResult`, `tensorflow.IFunctionDef`, `RequestApprovalEmployee`, `MerkleInclusionQuantifier`, `SqlFragment`, `OrdersService`, `TestRunArguments`, `ThreadID`, `T5`, `KeyPairEthereumPaymentsConfig`, `BatchCreateAttendeeCommandInput`, `CronJobOptions`, `CustomQuery`, `DropInfo`, `iAst`, `Bin`, `MerchantService`, `VirtualMachine`, `OptionsType`, `GeoCoordLike`, `PaymentInfo`, `AddressLabel`, `LocationService`, `ClusterData`, `ResponderHelper`, `GL`, `ILayoutState`, `ModelJsonSchema`, `Dialogic.IdentityOptions`, `HostClient`, `HTMLTextAreaElement`, `AuthorizeConfig`, `ModuleFormat`, `StockSandbox`, `LegacyCompilerContext`, `ApmSystem`, `ExtendedCanvasRenderingContext2D`, `d.BuildLog`, `ScaffoldType`, `doctrine.Type`, `XYCoord`, `VNodeElement`, `IMemFileSystem`, `Remarkable`, `CmsEditorFieldTypePlugin`, `IHttpProvider`, `LimitLeafCounter`, `LoggerTransport`, `MouseState`, `DataQueryRequest`, `OrderImpl`, `TNSPath2DBase`, `Tagged`, `MmpService`, `WebRtcTransport`, `DigitalInOutPin`, `egret.TouchEvent`, `PageImportExportTask`, `ListRegistriesCommandInput`, `LoopTemplate`, `TaxonomicFilterGroupType`, `PolymerElement`, `OnSaveProps`, `Logger`, `ts.ObjectLiteralExpression`, `Angulartics2GoogleAnalytics`, `CLM.ScoredAction`, `ActiveMove`, `RolloutTracker`, `OperatorUser`, `SafeBlockService`, `StatusProps`, `ComponentController`, `ScrollToService`, `TriggerInternal`, `KubeContext`, `GitExtension`, `Foobar`, `GeneratorPaths`, `SwipeGestureEventData`, `ImportObject`, `LoadSettings`, `ExpressionsSetup`, `LinearOptions`, `RuleAction`, `TestProduct`, `NucleusApp`, `ObjectTypeComposerFieldConfigDefinition`, `IGetExportConfigsResponse`, `CircleProps`, `MapViewFeature`, `IHotspotIndex`, `SpotTag`, `DescribeTableCommandInput`, `IProseMirrorNode`, `MonzoService`, `argsT`, `PropsFieldMeta`, `WildlingsAttackGameState`, `ExpressionParams`, `Cocoen`, `ElfSectionHeader`, `MetaTagState`, `CpuState`, `SelectionsWrapper`, `HTMLIonActionSheetElement`, `GoogleMap`, `PageItem`, `Models.DiagnosticsSettings`, `ClippedRanges`, `ListDomain`, `DatabaseParameterSummary`, `InMemoryOverlayUrlLoader`, `QuickPickStep`, `VirtualFile`, `TestContextCustom`, `BasicProps`, `IInput`, `CheckAndApproveResult`, `CollectorFilter`, `FullCalendar.ViewObject`, `UsageCollectionPlugin`, `SceneExport`, `TargetedEvent`, `MemoryStore`, `Chromosome`, `CdkTreeNodeDef`, `Aabb3`, `ThisAddon`, `LedgerWalletProvider`, `ParamMetadata`, `PaletteThemeConfig`, `IRequestMap`, `ResponsiveState`, `ELogLevels`, `ResultInterface`, `CardService`, `CloudflareApi`, `OnceTask`, `MouseAction`, `HandleReference`, `KuduClient`, `requests.ListObjectsRequest`, `SpriteSheet`, `AcceptedNameType`, `ResourceMetadata`, `LegendEntry`, `AnimatedValue`, `StatusBarItemsManager`, `IJetURL`, `VNodeWithAttachData`, `IImageryMapPosition`, `Heatmap`, `NewLineType`, `ZoneManager`, `SavedObjectsRemoveReferencesToOptions`, `StringStream`, `ThreadChannel`, `PolyserveApplication`, `_ResourceConstantSansEnergy`, `TableViewModel`, `StorageModuleAsyncOptions`, `ParameterToken`, `Sequence`, `ConfigurationListItemType`, `DeleteDetectorCommandInput`, `RRTypeWindow`, `GitHubUser`, `PutRecordCommandInput`, `ObjectOptions`, `DictionaryService`, `EndpointClass`, `ParsedResponse`, `DistinctPoints`, `React.ReactNodeArray`, `VideoTileController`, `Handlebars.TemplateDelegate`, `Asteroid`, `BasicObstacleSide`, `CssBlockError`, `CaBundle`, `UpdateDashboardCommandInput`, `BigQueryRetrievalRow`, `AudioVideoObserver`, `ScheduleType`, `XmlBlobsCommandInput`, `Express.Request`, `SearchSourceFields`, `Subsegment`, `VdmParameter`, `NodeIdLike`, `IMoveDescription`, `BlockNodeRecord`, `FileChangeType`, `Notice`, `PutPermissionCommandInput`, `AnyType`, `d.HostRule`, `DataOption`, `CivilTCR`, `DeleteIPSetCommandInput`, `RemoteParticipant`, `d.OptimizeJsInput`, `MutationSubState`, `SurveyLogicItem`, `EditArticleDto`, `ISO8601Date`, `TCollectionSchema`, `ProxyInstance`, `IAudioSource`, `vscode.CancellationTokenSource`, `GetByIndex`, `TrustToken`, `InitializeSwapInstruction`, `Transformed`, `SlaverListener`, `ConversationTimeline`, `ProposalTemplateService`, `DebuggingMode`, `TelegramBot`, `ExpressMeta`, `IFormSectionData`, `DocumentTypeDef`, `DoomsdayDevice`, `S3PersistenceAdapter`, `ResultPath`, `LroImpl`, `GetByEmailAccountsValidationResult`, `RDQuery`, `grpc.Client`, `CreateApplicationRequest`, `CustomText`, `ExpandGroupingPanelCellFn`, `PromiseBase`, `IGameChara`, `CommandLineParameter`, `PSIBoolean`, `StringToUtf32`, `IQueueRow`, `DeletePortalCommandInput`, `ColliderComponent`, `MySpacesComponent`, `InitialAlert`, `CrossTypeHost`, `TransitionFn`, `Values.ReadyValue`, `BaseApi`, `UserOrganizationService`, `ExecuteCommandState`, `Check`, `BoxShadowItem`, `DesignerTypeOption`, `UnaryFunction`, `TRuleResolver`, `WorkerConfig`, `Percentage`, `Transaction`, `EventToAsyncUnHandler`, `ExceptionType`, `PythonPlatform`, `IMenuItemConfig`, `Self`, `ISagaModule`, `ConfigSet`, `CreateEnvironmentCommandInput`, `StorageArea`, `PartialBotsState`, `MailSettings`, `DbSchema`, `ɵAngularFireSchedulers`, `IPage.IRequest`, `TPlacementMethodArgs`, `TypeKindEnum`, `CoreURI`, `Value.Of`, `Trash`, `IAssignmentUnitModel`, `CustomType`, `PlotSpec`, `IMutableFlatGridItem`, `Searcher`, `ScalarMap`, `StackReference`, `ISetCategoricalFilter`, `GetInfoResult`, `MessageRequester`, `DataClient`, `MediaExtended`, `TAny`, `SyslumennAuction`, `Flo.EditorContext`, `ProjectMeta`, `ForgedResponse`, `WebRequest`, `CanvasPinRow`, `lsp.TextDocumentPositionParams`, `CachedVoiceState`, `NodeLinks`, `FileSystemProviderWithOpenReadWriteCloseCapability`, `d.BuildConditionals`, `FaviconOptions`, `PointerCoordinates`, `StartFrame`, `GraphicStyles`, `ElasticSearchOptions`, `ModuleThis`, `ZeroPadding2D`, `MdcSnackbarConfig`, `CucumberRunner`, `VertexTypeStore`, `PrismaClientOptions`, `ButtonOptions`, `StatefulCodeblock`, `SHA512_256`, `NodeType`, `ManifestInstance`, `SendTx`, `EditLog`, `CompoundFixture`, `MultipartFile`, `IOObject`, `ProtocolError`, `StateManager`, `SignInPayload`, `TransferTransactionUnsigned`, `UriService`, `NormalRequest`, `Eq.Eq`, `SlashCreator`, `RpcRemoteProxyValue`, `SpansRepository`, `ANK1`, `InterpolationStep`, `TypeConditionRestriction`, `PlanarMaskBaseTool`, `TrackItem`, `Mention`, `RulesModel`, `UiService`, `PositionComponent`, `RumConfiguration`, `ErrorReporterConstructorContract`, `PyChessModel`, `turkInformation`, `MemBank16k`, `reduxLib.IUseAPIExtra`, `ODataStructuredTypeParser`, `BinarySwitchCCSet`, `UserTokenAccountMap`, `SingleConfig`, `IMyChangeRequestItem`, `ExecutorMessages`, `AVPlaybackStatus`, `IRenderLayer`, `CollisionScaleType`, `Toolkit`, `ts.CompilerOptions`, `CustomFunction`, `BespokeServer`, `StatusState`, `DOMUtils`, `IAgent`, `Package.ResolvedPackage`, `InteractionType`, `DAL.KEYMAP_MODIFIER_POS`, `LocalTitle`, `NPCActorItem`, `ToastOptions`, `ThyTranslate`, `SourceFileNode`, `ELanguageType`, `UploadAssetOptions`, `BufferedTransport`, `ODataQueryArgumentsOptions`, `GraphImmut`, `RequestSet`, `TiledSquareObject`, `LgQuery`, `ISelection`, `XPCOM.nsIHttpChannel`, `PaletteConfig`, `WalkerCallback`, `AnimeListStatusFields`, `MacroHandler`, `IniData`, `OpDescription`, `ContainerRepository`, `ExpectStatic`, `ISettingsState`, `MiddlewareFnType`, `EventTopics`, `requests.ListEdgeSubnetsRequest`, `HeadBucketCommandInput`, `PDFBool`, `TestHandler`, `CacheOptions`, `OpenApiPersistedSchema`, `requests.ListShapesRequest`, `DraggableLocation`, `FloatTypedArrayConstructor`, `DecoratedError`, `ThemeTag`, `ChatTab`, `LanguageEntry`, `Iso`, `EnumValueDefinitionNode`, `PointData`, `AnalyticsProperties`, `TextEncoder`, `ConfigChecker`, `IToken`, `ParsedCommandLine`, `ManagedBackupShortTermRetentionPolicy`, `SFSchema`, `DebugProtocol.AttachResponse`, `ExpectResponseBody`, `RequestArguments`, `SrtpSsrcState`, `HealthCareApisClient`, `ReferencesResult`, `IPanelProps`, `ParsedSystem`, `Effects`, `NgxDateFnsDateAdapter`, `TsRadioOption`, `TreeNodeHTMLElement`, `JournalShowQueryParams`, `GenericMeasure`, `Spark`, `JRPCResponse`, `CreateRangeChartParams`, `TraverseFunction`, `RegionMetadataSchema`, `ISetOverlapFunction`, `Where`, `UseMutationResponse`, `StringColumn`, `KeyboardScope`, `MyEThree`, `CredentialCreationOptions`, `SO`, `SubscriptionOption`, `ServerConfig`, `InternalBema`, `Branch`, `HookEffects`, `SetIamPolicyRequest`, `RootPackageInfo`, `Algebra.PlanNode`, `StorageType`, `ThemeOptions`, `TransitionableCielchColor`, `TrackBuilder`, `ChangeSet`, `SubMenuProps`, `MoveCheck`, `PointerAbstraction`, `UserFormValues`, `SkyhookDndService`, `MixerCommunicator`, `ng.IIntervalService`, `TimeQueryData`, `DocumentExtra`, `KeyboardDefinitionSchema`, `TypeAssertion`, `FairCalendarView`, `TimelinePath`, `RpcMessageSubject`, `SettingNames`, `OPaths`, `MockComm`, `JsxTagNameExpression`, `scriptfiles.ASScope`, `SerializedTreeViewItem`, `reflect.ClassType`, `ListDatabasesRequest`, `Types.NavigatorRoute`, `BMapGL.Point`, `ast.EscapeNode`, `EntitySchemaField`, `IDockerImage`, `IdentityView`, `FundedAwardModel`, `IAbstractControl`, `d.BuildResultsComponentGraph`, `Vec`, `BullBoardRequest`, `ContentLayoutProps`, `ListThemesCommandInput`, `OperationQueryParameter`, `TableResult`, `IEditEntityByMemberInput`, `Ver`, `ModifyEventSubscriptionResult`, `requests.ListCatalogsRequest`, `MessageExecutor`, `CommandExecutor`, `MenuAction`, `InvalidateOptions`, `AssetManifest`, `RepositoryState`, `End`, `SendMessageData`, `ParsedLocator`, `GfxChannelBlendState`, `PureSelectorsToSelectors`, `RangeProps`, `MaterialFactory`, `OperatorFormat`, `EditorSuggestionPlugin`, `ObservableVocabulary`, `IVisualHost`, `LinkTextLocator`, `VcalAttendeeProperty`, `TestFunctionImportMultipleParamsParameters`, `BlockbookBitcoin`, `ListSecretsRequest`, `CreateChildSummarizerNodeFn`, `MatBottomSheet`, `PingMessage`, `ReflectionGroup`, `SenderDocument`, `TypeDetails`, `ZoweUSSNode`, `CollectionInstance`, `FakeImporter`, `DeepType`, `ContextModel`, `Vector2D`, `BackgroundFilterOptions`, `ScheduledCommandInfo`, `Remirror.CommandDecoratorOptions`, `IGetTasksStatistics`, `AppserviceMock`, `InferableAction`, `TAccount`, `RegexDialect`, `BoxGeometry`, `NetworkVersion`, `ChildGraphItem`, `MediaPlaylist`, `ParentContexts`, `BlockDocument`, `IDebugger`, `RegSuitConfiguration`, `CharacterMetadata`, `Voice`, `FactoryFn`, `TruncatableService`, `ParameterTypeModel`, `Point3d`, `EmailDoc`, `Lock`, `GeoJSONGeometry`, `SelectionRangeParams`, `NodeEncryptionMaterial`, `CreateProcedureWithInputOutputParser`, `TerminalProcess`, `StandardMaterial`, `QueryOutput`, `Renderer`, `Handlers`, `WidgetObject`, `CounterMetric`, `DeleteTableCommandInput`, `InvokeArgument`, `ValueValidator`, `AttributeService`, `VersionId`, `WithIndex`, `GlobalModelState`, `ISubprocessMessageBase`, `EvaluatedChange`, `Oazapfts.RequestOpts`, `DecryptResultPmcrypto`, `JsonRpcPayload`, `DeleteWorkflowCommandInput`, `TilePathParams`, `ElMessageBoxOptions`, `G6Edge`, `ISmartMeterReadingsAdapter`, `AppNotification`, `FeeAmount`, `IAmazonServerGroup`, `TsPaginatorMenuItem`, `LockHandle`, `ZoneLocator`, `EpicTestMocks`, `Type_AnyPointer_ImplicitMethodParameter`, `ContentReader`, `OperationArguments`, `InterfaceWithCallSignature`, `DisplayDataAmount`, `RSPSharedOutput`, `ErrorMessageProps`, `VisibilityState`, `InvoiceService`, `SourceRootInfo`, `AuthPluginPackage`, `ElementInfo`, `PursuitRow`, `tr.events.Name`, `GridReadyEvent`, `Validation.Result`, `IMatcherFunction`, `RouteShorthandOptions`, `IWorkflowData`, `LiteloaderVersion`, `VectorArray`, `ParticipantTracks`, `Cookies.Cookie`, `RetryOptions`, `PublicDeviceDTO`, `Fn3`, `DefaultReq`, `ScannedReference`, `RangePointCoordinates`, `Ret`, `LogoImageProps`, `SV`, `TweetMedia`, `ExtendedPostFrontMatter`, `ViewportSize`, `IDataSourceDictionary`, `Migration`, `BaseIndexPatternColumn`, `ReaderStateParserLike`, `ECSqlInsertResult`, `PreviewData`, `DeleteEventSubscriptionCommandInput`, `LogAnalyticsSourceMetric`, `CameraOptions`, `RNN`, `ReflectedType`, `ISODate`, `FormGroupControls`, `Prediction`, `Coll`, `FormSchema`, `VueFile`, `GravityInfo`, `Evees`, `CurrentAccountService`, `QueryClient`, `ImageStyleProps`, `Values`, `ListEvents`, `StyleTokens`, `NzCarouselContentDirective`, `DefaultReconnectDisplay`, `ScryptedRuntime`, `AwilixContainer`, `ExternalService`, `TPluginsStart`, `ProtocolRequestType`, `UpdateDatabaseCommandInput`, `LanguageConfiguration`, `EdaBlankPanelComponent`, `S3MetricChange`, `ExecutionContextInfo`, `CriteriaGroupType`, `DocumentRegistry.IContext`, `TerminalApiRequest`, `ExcludedRule`, `Navigator`, `SchemaValidationResult`, `StripeElements`, `HTMLCmpLabelElement`, `S1Sale`, `UnitTestTree`, `DeleteExpression`, `TexGen`, `RouteArg`, `DaoTokenWrapper`, `RelativeDateFilterTimeUnit`, `monaco.languages.FormattingOptions`, `SequenceNumber`, `t.TSType`, `requests.ListContainerDatabasePatchesRequest`, `Attrs`, `Union3`, `XYState`, `IContextMenuItem`, `CipherBulkDeleteRequest`, `MdDialogRef`, `ShownModallyData`, `MarkdownSerializerState`, `GoogleAppsScript.Spreadsheet.Sheet`, `ESTree.ImportDeclaration`, `ICommonTagsResult`, `ComponentCompilerStaticProperty`, `IConnectionFactory`, `PutConfigurationSetSendingOptionsCommandInput`, `app.FileService`, `GenericThemes`, `IRoomData`, `UrlWithParsedQuery`, `CalendarHeatmapData`, `EqualityComparison`, `SystemRequirement`, `BleService`, `TFLiteNS`, `RootStoreType`, `Integer64`, `ContainerWithState`, `Key4`, `BufferAttribute`, `IFileStore`, `ElementDescriptor`, `IThrottleService`, `GX_Material.GXMaterial`, `EntityResolver`, `CompiledPath`, `HierarchyOfArrays`, `TokenIdentifier`, `TabulatorThingChanges`, `Equivalence`, `ContentNode`, `VectorStage`, `Coverage`, `UsersServiceTest`, `LowLevelResponse`, `CSSProperty`, `TimePeriod`, `ChartState`, `AccountsModel`, `HTMLElement`, `SyntaxCursor`, `Spreadsheet`, `PromiseMap`, `DocInfo`, `ScaleValue`, `Transform2D`, `DaffCartShippingInformation`, `FormWindow`, `ScreenEventType`, `NodeInputs`, `ExportTypeDefinition`, `HorizontalPlacement`, `InboundStream`, `IPromise`, `GitManager`, `IPropertyWithName`, `NetworkPolicy`, `ThingView`, `ServiceConfig`, `DaffContactState`, `AccessRuleCriteria`, `ConfigDefinition`, `GlobbyOptions`, `StableTokenInstance`, `VariantFunction`, `DomSource`, `GetConnectionsCommandInput`, `XYChartSeriesIdentifier`, `MetadataProvider`, `ConventionalCommits`, `StrongExpectation`, `egret.Shape`, `PaginationDTO`, `ZipFileEntry`, `ProjectStep`, `CRG1File`, `SavedObjectUnsanitizedDoc`, `TabChangeInfo`, `OrderbookL2Response`, `ESLAnimateConfigInner`, `IExecuteResponsePromiseData`, `GetResourceCommandInput`, `BillAmount`, `IItemRenderData`, `FormlyDesignerConfig`, `PrunerPiece`, `ApplicationParameter`, `TabifyBuckets`, `requests.ListZoneTransferServersRequest`, `SqrlExecutable`, `NmberArray9`, `DevToolsNode`, `IPlaylist`, `ma.TaskLibAnswers`, `INamedVector`, `BuildApiDecOpts`, `RenderPassId`, `AppealChallengeData`, `ScopedHandler`, `TinymathAST`, `CategoricalChartState`, `OnCacheChangeEventFn`, `PokerScoreService`, `PDFFont`, `NexusScalarTypeDef`, `FileSyntax`, `thrift.TList`, `ISelEnv`, `SFieldDescribe`, `RenderContainer`, `IosDependency`, `FocusTrapInertStrategy`, `EvmType`, `ObservableQueryBalanceInner`, `KSolvePuzzleState`, `UILayoutViewController`, `IPopoverProps`, `NodeCheckFn`, `IContentType`, `ICData`, `RadioGroup`, `CustomStyle`, `LogBoxLayout`, `SVGAttributes3D`, `TimelineOptions`, `StreamModelWithChannel`, `Series`, `ValidationResult`, `IEventPlugin`, `FileFormat`, `ModuleOptions`, `BedrockFile`, `ColumnConfigArg`, `TEObject`, `IFirmware`, `Sampler`, `ApplicationListener`, `CancelFnType`, `Data`, `APIResponseType`, `TagsViewState`, `IDictionary`, `InvalidArnException`, `MatrixEntry`, `AccessoryTypes`, `AaiChannelItem`, `PluginsAtomType`, `ConsoleLike`, `t.NodePath`, `IColor`, `UNIST.Node`, `vile.YMLConfig`, `Electron.BrowserWindow`, `PvsVersionDescriptor`, `IAPIService`, `Tensor6D`, `StrictEventEmitter`, `Whiteboard`, `LocalForageObservableChange`, `WrapperOptions`, `ListWorkRequestsResponse`, `JPiece`, `IDriverInfo`, `DeviceCreateParams`, `IScheduleApiModel`, `ShallowRenderer`, `CircleShape`, `GetIntegrationCommandInput`, `UniformRandom`, `PreviousSpeakersActions`, `IRepositoryModel`, `IStateTypes`, `UntagResourceOutput`, `Accessor`, `google.maps.Map`, `NexusEnumTypeDef`, `PromiseEmitter`, `Birds`, `AlphaConfig`, `HostRuleHeader`, `OrganizationPoint`, `MccScrollspyGroup`, `PIXI.Texture`, `ZRenderType`, `FirebaseError`, `HTMLIonToastElement`, `CeramicConfig`, `IParameterValuesSourceProvider`, `DisposableCollection`, `PopupUtilsService`, `TabBarProps`, `GroupsGetterFn`, `Glissando`, `DNSLabelCoder`, `SpeechRule`, `SocketCustom`, `ReadFileFailCallbackResult`, `ListDeploymentsCommandInput`, `RouteRecordNormalized`, `MockCachedRule`, `HookType`, `TreeModel`, `ListProps`, `CalendarMode`, `RowSet`, `Newsroom`, `UpdateGroupRequest`, `ISessionBoundContext`, `ZAR.ZAR`, `BumpInfo`, `ILoadAll`, `admin.firestore.DocumentSnapshot`, `Updater`, `BackendService`, `SizeNumberContext`, `BuildImpl`, `Common`, `ISpacesClient`, `SpringResult`, `Field_Group`, `PendingAction`, `MsgCloseLease`, `UploadResult`, `FormData`, `LocalizedCountry`, `OidcClientSession`, `TimestampManager`, `Partition`, `CustomBond`, `ComparisonOptions`, `IController.IParameter`, `TooltipOperatorOptions`, `TextInput`, `HashBucket`, `CmsService`, `JSX.HTMLAttributes`, `jasmine.CustomReporterResult`, `WrapExportedClass`, `StreamService`, `SerializedSlot`, `AsyncFrameworkFn`, `TokenClaims`, `ReportingStore`, `IconOptions`, `ExtendedAdapter`, `AuthService`, `OperationURLParameter`, `TEX0`, `OnCleanup`, `Variance`, `NameSpace.WithEnum`, `Date`, `T`, `tf.Tensor3D`, `SpecFun`, `EnhancedSku`, `DataGrid`, `ValueGetter`, `ShouldSplitChainFn`, `PathParser`, `EstreeNode`, `ISequencedDocumentMessage`, `IConvectorControllerClient`, `TypeChecker`, `ShaderParam`, `WatchSource`, `AppIdentity`, `JWTService`, `PiLimitedConcept`, `Ellipse`, `IApiExternalDocumentation`, `IDevice`, `DecryptedSymmetricKey`, `DialogService`, `AbstractObject3D`, `ABLTableDefinition`, `Tournament.TournamentConfigsBase`, `DataSet`, `d.InMemoryFileSystem`, `QRCodeNode`, `IOverlayAnimationProps`, `OPENSEARCH_FIELD_TYPES`, `AuthKey`, `TextEdit`, `RouteInfoWithAttributes`, `FSTree`, `inversify.Container`, `OpenAPIV3.SchemaObject`, `FileCompletionItemManager`, `Reject`, `EthereumListener`, `FileResource`, `Types.PresetFnArgs`, `Nodes.NameIdentifierNode`, `IdentityTest`, `WorkflowStepInputModel`, `StagePanelType`, `BoardType`, `IStszAtom`, `TransactionApplyContext`, `IParameterDefinition`, `Type`, `IXulElementSpec`, `CreateDedicatedIpPoolCommandInput`, `WalletAdapter`, `TagMap`, `CLI_COMMAND_GROUP`, `ImmutableNotebook`, `TodosST`, `ListDbSystemsRequest`, `EngineResponse`, `AzureTokenCredentialsOptions`, `WatchEventType`, `DiffLine`, `AnalyticUnitId`, `FcConnector`, `LookupFnResult`, `DescribeChangeSetCommandInput`, `Var`, `XPathResult`, `ILaunchSetting`, `FloatArray`, `AtomDataHandler`, `TransactionInput`, `EpicSignature`, `ItemPriceRate`, `Gravity`, `FnU2`, `ts.EnumDeclaration`, `SidenavState`, `ReplicaOnPartition`, `Framework`, `BarSeriesStyle`, `SkillMap`, `interfaces.IExtensionConfiguration`, `CSSScalar`, `NodeWithChildren`, `RecordOfType`, `NmberArray16`, `ContentEditableEvent`, `BoardView`, `THREE.Material`, `providers.BaseProvider`, `NDArrayMath`, `BasePath`, `FocusOutsideEvent`, `Phrase`, `q.TreeNode`, `PossibilityChild`, `ImageryLayer`, `d.TypeInfo`, `Person_Employment`, `DebouncedFunc`, `Sig`, `EncString`, `t.Visitor`, `RestConfigurationMethodWithPath`, `SlotFilter`, `FormApi`, `LovelaceCardConfig`, `EventKind`, `FactoryState`, `AlertClusterStatsNode`, `IContentSearchResponse`, `RBTree`, `BaseN`, `ForceGraphLink`, `Agent`, `SystemState`, `RegisteredSchemas`, `GameDataState`, `VueTag`, `DescribeOrderableDBInstanceOptionsCommandInput`, `SubmissionSectionObject`, `ImageType.StyleAttributes`, `ViewPortItem`, `MyClassWithReturnArrow`, `EAdvancedSortMethod`, `KeyOctave`, `EntContract`, `WidgetDescription`, `Epic`, `moment.Duration`, `IDocumentContext`, `I18NLocale`, `ValidationFn`, `VideoState`, `CreateResponse`, `SearchQuery`, `ModeRegistration`, `SettingEntity`, `ListDataSourcesCommandInput`, `changeCallback`, `W`, `TransportContext`, `ExtendableMessageEvent`, `ChatContext`, `JSDoc`, `CreateJobRequest`, `ListState`, `HSD_LoadContext`, `HTMLSuperTabButtonElement`, `MediaConfig`, `ts.CompilerHost`, `ScaleTime`, `JsonSchemaDataType`, `Arweave`, `ResourceReturn`, `SpeechConfig`, `ImageProvider`, `DeleteDomainCommand`, `SqlParameter`, `Score`, `RealTestServer`, `XPCOM.ContentFrameMessageManager`, `IAppSettings`, `Github.PullRequestsGetResponse`, `StateOptions`, `MimeBuffer`, `SKU`, `AssemblyBlockContext`, `Formats`, `DisplayMarker`, `Couple`, `OrbitCameraController`, `StackPanel`, `IDataSource`, `NormalizedOptions`, `TOptions`, `GetPrTimelinePayload`, `TransformingNetworkClient`, `SCTP`, `FieldFormatEditorFactory`, `requests.ListIamWorkRequestErrorsRequest`, `WorkNode`, `BitBucketServerPRComment`, `TestChannel`, `PermutationVector`, `TState`, `TileInputs`, `Interpolation`, `UserStateService`, `GitLogCommit`, `PlatformEvent`, `GherkinDocumentHandlers`, `RaguServerConfig`, `VirtualDocument`, `JSONPath`, `SideNavComponent`, `angular.IWindowService`, `DataAnalyzeStore`, `IOSIconResourceConfig`, `MessageRecord`, `NeisCrawler`, `ComponentInstruction`, `IJsonPatch`, `C3`, `IConstruct`, `ARecord`, `GeolocationPosition`, `BaseAddress`, `ConeSide`, `ConnectionMetrics`, `MMOnlineStorage`, `GeomGraph`, `ElementCreationOptions`, `FocusedElement`, `PrerenderConfig`, `Events.deactivate`, `YDomainRange`, `LogAnalyticsMetric`, `ast.QuoteNode`, `SpotSession`, `BadGuy`, `RuleConfig`, `ViewResources`, `IDataFilterResultValue`, `UseQueryOptions`, `Vulnerability`, `HoveredResult`, `Swagger2`, `RulesProvider`, `ICourseModel`, `ParseErrorLevel`, `Site`, `RepositoryEsClient`, `FrameRequestCallback`, `PoolConnection`, `ArtifactFilePaths`, `MagickInputFile`, `SqrlCompiledOutput`, `MonitorCollection`, `INodeExecutionData`, `Captcha`, `Demo`, `FabFilesObject`, `PromiseWithProgress`, `CaseReducer`, `BifrostProtocol`, `MockedKeys`, `HapiResponseToolkit`, `WebsiteScanResultProvider`, `StickyVirtualizedListState`, `DynamicFurParam`, `HTMLObjectElement`, `DateInputFormat`, `ViewModelQuery`, `RxFormArray`, `UniqueNameGenerator`, `NodeList`, `IBucketHistogramAggConfig`, `IBifrostAccount`, `TypedMessage`, `SetOverlap`, `ReputationOptions`, `KeyRegistrationBuilder`, `AttestationModel`, `CleanupCallback`, `IUserRepository`, `DAL.DEVICE_ID_GESTURE`, `MDCTabBarView`, `Pocket`, `PUUID`, `EntityTree`, `GeometryStreamProps`, `ObservationService`, `ChildNode`, `BaseHistory`, `CreateEmailIdentityCommandInput`, `RawResponseCallback`, `core.Coin`, `IOdspAuthRequestInfo`, `CategorizedOption`, `InputData`, `ExpressionVariable`, `CompilerSystemRemoveDirectoryResults`, `AccountPagination`, `AZSymbolKind`, `AsyncHierarchyQuery`, `XTreeNode`, `IAccountsState`, `TOutput`, `DefinitionRange`, `IRunConfiguration`, `WglScene`, `ListValue`, `MapReward`, `DebugProtocol.DisconnectArguments`, `Router`, `MaskServer`, `DescribeAppInstanceAdminCommandInput`, `Registry`, `TChunk`, `IdSelector`, `StatefulChatClient`, `SourceBreakpoint`, `TradeDirection`, `DataOptions`, `NgrxJsonApiZone`, `TBSelection`, `NetworkPluginID`, `ProjectVersionMeta`, `IPNResponse`, `CreateAccountsRequestMessage`, `Studio.App`, `JSX.Element`, `ITreeData`, `RpcProgram`, `PartiallyParsedPacket`, `Label`, `DefaultChildrenWNodeFactory`, `ConfigImagery`, `BTIData`, `IMinemeldStatusNode`, `WebStandardsDashboard`, `TestSuiteNode`, `SVGAElement`, `GeomEdge`, `IMutableVector3`, `CancellationTokenRegistration`, `StatsGetterConfig`, `TableColumnConfig`, `TagResourceResponse`, `SavedObjectsSerializer`, `BatchChangeSet`, `RangeIterable`, `CommandFunction`, `StepArgs`, `DescribeReplicationTasksCommandInput`, `ClassStaticBlockDeclaration`, `ClassTypeFlags`, `AnnotationConstructor`, `ConfigurationCCAPISetOptions`, `ITaskRepository`, `RailsDefinitionInformation`, `FakeExecution`, `PromiseRequest`, `JSXNode`, `DefaultDataServiceConfig`, `DeletedAppRestoreRequest`, `IQueryInfo`, `IDataTableColumn`, `MarkerSet`, `TagResourceCommand`, `CLIOptions`, `VueI18n`, `Suggestion`, `server.TextDocument`, `ESLSelectItem`, `CurveVector`, `AimEvent`, `AthenaRequestConfig`, `DataSourceSnapshot`, `MDSPostgresClient`, `EventExclusionPlugin`, `Precondition`, `React.FormEventHandler`, `Guid`, `EmbeddableOutput`, `PetService`, `JoinType`, `JsonSchemaRootAssertion`, `moneyMarket.market.BorrowerInfoResponse`, `ConvCommitMsg`, `BaseClass`, `ILeague`, `BigIntMoneyBase`, `IHawkularRootScope`, `BlobContainer`, `WebOutput`, `Notations`, `RemoteService`, `HeadersInit`, `Time`, `CompilerSystemRemoveFileResults`, `PersistedLog`, `ImportNameWithModuleInfo`, `RowOfAny`, `OverridableComponent`, `Handler`, `TileMapArgs`, `TaskData`, `MessageQueue`, `TokenAmount`, `ConditionExpressionDefinitionChain`, `HTTPClient`, `IAdvancedBoxPlotData`, `DescribeEnvironmentManagedActionHistoryCommandInput`, `Questions`, `ConfigValueChangeAction`, `JsonDocsStyle`, `ICodeGenerationStackOutput`, `TurtleBuilder`, `SessionEvent`, `__HttpHandlerOptions`, `RoleValidationResult`, `IPathMapping`, `SourceDescription`, `LegendLocationSettingsProps`, `ValidationResponse`, `DocumentId`, `VertexType`, `fc.Arbitrary`, `Resolvable`, `EuiComboBoxOptionOption`, `IChip`, `IActorRdfDereferenceOutput`, `TaskNow`, `Password`, `RTCRtpTransceiver`, `OPCUAClient`, `SPADeploy`, `MessageOrCCLogEntry`, `StageDataHolder`, `TransactionObject`, `Natural`, `TypeScriptServerHost`, `WriteGetter`, `MappedField`, `d.ComponentCompilerPropertyType`, `EvaluatedMetric`, `CloudProvider`, `SinonSpy`, `TaskSpec`, `Tree`, `FlowTransform`, `DayModifiers`, `SetupCommitment`, `SkyBoxMaterial`, `TransformationContext`, `PermissionContext`, `GenericRequestMapper`, `RelationEntry`, `TBEvent`, `Storable`, `GraphqlConfig`, `BlobsModel`, `Flavor`, `GfxTextureDimension`, `OutputProps`, `LocationSource`, `IconSettings`, `AddressBalance`, `PageRequest`, `MiRectConfig`, `SimplifiedParameterType`, `CubieCube`, `CreateDBClusterSnapshotCommandInput`, `StaffTuning`, `ContainerRegistry`, `EdmTypeField`, `angular.ui.bootstrap.IModalServiceInstance`, `IRemoteRoom`, `BlockMapType`, `com.nativescript.material.core.TabItemSpec`, `Animated.EndCallback`, `ServiceURL`, `WithdrawAppState`, `GunGraphData`, `ConnectionHandler`, `SwaggerConfig`, `OrderState`, `MetricsConfiguration`, `GDQOmnibarBidwarOptionElement`, `QRCodeSharedData`, `IDBAccessQueryParams`, `IDesignLike`, `MapIterator`, `Terminal`, `FieldResultSetting`, `QueryLeasesRequest`, `cytoscape.CollectionElements`, `sdk.SpeechRecognitionEventArgs`, `SymbolCategory`, `WebGLMemoryInfo`, `ChainTransaction`, `TraverseCallbackType`, `AbortChunk`, `TestFunctionImportEdmReturnTypeCollectionParameters`, `XUL.chromeDocument`, `StyleUtils`, `T.Model`, `PIXI.Graphics`, `SelectMenuItem`, `SharePublicKeyOutput`, `ExitCode`, `requests.ListSuppressionsRequest`, `ChunkGroup`, `MESSAGE_ACTIONS`, `VercelResponse`, `AppStateTree`, `SuggestChangeHandler`, `FormElement`, `AveragePooling1D`, `ViewRect`, `MergeItem`, `ElementComponent`, `IUiAction`, `YearProgressService`, `AlbumEntity`, `MultiTablePrettyfier`, `LogObject`, `CredentialRecord`, `AbsoluteDirPath`, `Variables`, `PropertyResolver`, `NodeAttributes`, `IPair`, `InputBox`, `BlockBody`, `FunctionConstructor`, `ImportOrExportSpecifier`, `GroupedFunnel`, `common.AuthParams`, `UseGoToFormConfig`, `ScrollAreaContextValue`, `UInt32Value`, `ValueHandler`, `SetupObjects`, `TrigonometryBlock`, `PhoneNumber`, `KratosService`, `WithGeneric`, `SparseSetProps`, `PanelData`, `PiPrimitiveProperty`, `CurrencyMegaOptions`, `Dexie.Table`, `ParameterMetadata`, `ListWorkRequestLogsRequest`, `TSESTree.Literal`, `PDFField`, `OperationNotPermittedException`, `LchaColor`, `HttpWrapper`, `NavigationService`, `Mappings`, `BeanDefinition`, `PathNodeItem`, `WikiPage`, `SDLValidationContext`, `GeoUnitsForLevel`, `GitLog`, `SSBSource`, `UploadableMap`, `SliderGLRenderer`, `GoogleWebFontData`, `MatchmakerTicket`, `TransformStreamDefaultController`, `SchemaValidationContext`, `PeerCertificate`, `TreeMapNode`, `ISmsProvider`, `pxt.auth.Badge`, `AstNodeDescription`, `CommentModel`, `CommonMiddleware`, `Electron.Event`, `NamedVariableMap`, `PDFCheckBox`, `TriangleFilterFunc`, `FormFieldEditorComponent`, `IServerConfigModel`, `ReviewerStatisticsState`, `OperationStream`, `EntityID`, `IBox`, `ProductVariantSettingService`, `StartStopSingle`, `AsyncMachine`, `StaticComponent`, `ProviderLibrary`, `BatchRequest`, `PoolFields`, `SessionStateControllerState`, `LayerWeightsDict`, `QuadrantDirection`, `TextureDataType`, `ISubView`, `GraphQLSchemaWithFragmentReplacements`, `AuthorizationResult`, `TextureSlab`, `MVideoId`, `Dialect`, `JSDocTemplateTag`, `RTCDtlsTransport`, `PipelineRuntimeContext`, `ConstantsService`, `ListConfigurationRevisionsCommandInput`, `RoarrGlobalState`, `ComponentContext`, `GetRRSetRequest`, `CanvasRenderer`, `Epoch`, `FtrProviderContext`, `MinionsController`, `ParsedQRL`, `Keyring`, `pb.Callback`, `PowerlevelCCSet`, `ObserverCallback`, `CreateDeploymentResponse`, `ListIntegrationInstancesRequest`, `PickerProps`, `CkElement`, `InitiatingTranslation`, `IUserRegistrationInput`, `JwtUserData`, `IPropertyPaneField`, `SoundChannel`, `ValueGetterParams`, `CodeCell`, `SCN0_LightSet`, `S3`, `NativeScrollEvent`, `InterfaceRecursive`, `OnConflictBuilder`, `K.StatementKind`, `App.webRequest.IRequestProcessor`, `Schedule`, `TaskBase`, `IndexSymbolData`, `ActionParamsType`, `Window`, `HashHistoryManager`, `ExecutionPathProps`, `YjsEditor`, `MdastNodeMapType`, `StampinoRender`, `MakeRestoreBackup`, `RendererAPI`, `IRowProps`, `vscode.CancellationToken`, `TextColor`, `PluginService`, `MockRule`, `CallbackHandler`, `IAnimation`, `CCashCow.Payment`, `MustacheFile`, `IProcedure`, `IgetOpenRequests`, `BrowseCloudDocument`, `PersistedData`, `ExceptionListClient`, `ContextShape`, `JRPCEngineReturnHandler`, `QuadrantType`, `ActivatedRouteStub`, `WebGLRenderTarget`, `Queryable`, `ClassMember`, `TeamMembershipProps`, `BlobGetPropertiesResponse`, `CLM.LogDialog`, `AttachPolicyCommandInput`, `RemoteFileItem`, `Second`, `CallableContext`, `EndpointDetails`, `Beam`, `PropertyNode`, `ComponentState`, `CheckFlags`, `PartialObserver`, `IndexedCollectionInterval`, `V1Prometheusrule`, `ZoneSwitch`, `ReadAddrFn`, `JSONSchema6`, `IAstBuilder`, `TCacheResult`, `SeriesDataSortingOptions`, `DataInterface`, `PostgresInfo`, `InputThemeConfig`, `RootDispatch`, `MessageInstance`, `ReactDataGridColumn`, `GraphQLFieldResolver`, `ResolvedNode`, `EventFnBefore`, `Conv2D`, `Conv3DTranspose`, `OnItemExecutedFunc`, `PriorityCollectionEntry`, `IAttachment`, `RuntimeConfig`, `React.BaseSyntheticEvent`, `ErrorData`, `IAnimationOptions`, `CallableFunction`, `HTMLIonOverlayElement`, `Unknown`, `IVerificationGeneratorDependencies`, `IResultSetElementKey`, `ChartAntVSpec`, `PathFn`, `flatbuffers.Offset`, `TypeElement`, `ListsSandbox`, `DOMString`, `MockComponent`, `Integration`, `AttributeMap`, `IncrementalParser.SyntaxCursor`, `IGaeaSetting`, `FunctionalComponent`, `PiClassifier`, `ProofBranch`, `StateDto`, `MeshPhongMaterial`, `INPUT_SIZE`, `AdapterConfig`, `ASTVisit`, `OsqueryAppContext`, `UpdateConnectionDetails`, `CtrEq`, `TextLine`, `ListGrantsRequest`, `JSet`, `IClassify`, `JWTTokenModel`, `MsgWithdrawLease`, `AvailabilityDomain`, `GfxBindingLayoutDescriptor`, `License`, `WorkRequestLogEntry`, `Proppy`, `scribe.Config`, `IWidget`, `StyledComponentWithRef`, `VariableGroupDataVariable`, `ApplicationCloseFrame`, `TagCreator`, `CreateTypeStubFileAction`, `BaseModel`, `ISummaryHandle`, `Clauses`, `ActionBar`, `AssertClause`, `admin.app.App`, `FileChangeEvent`, `LoggingOptions`, `CommonStyle`, `StynRule`, `SUPPORTED_FIELD`, `OpenYoloCredentialRequestOptions`, `TestPlayer`, `Aggs`, `INumberFieldExpression`, `CreateComponent`, `IEpochOverview`, `RectangleShape2DSW`, `EventChannel`, `StateUpdatedEvent`, `Unchangeable`, `ConfigManager`, `ts.Visitor`, `EffectReference`, `GoogleDriveSyncMetadata`, `LayoutPaneCtrl`, `KeyModel`, `web3.PublicKey`, `BlockStatement`, `DocumentModel`, `FileHandle`, `ThermostatFanMode`, `SbbDialogConfig`, `ResizeChecker`, `InjectionError`, `EndResult`, `IDeployedContract`, `CanvasType`, `MaxAnalysisTime`, `GetSelector`, `AutoScalingConfiguration`, `IWorkspace`, `SocketIO.Server`, `AwrDbCpuUsageSummary`, `DLLData`, `LineRange`, `ListCardConfig`, `IDriverType`, `FakeHTMLAnchorElement`, `PvsFormula`, `Thenable`, `IsInstanceProps`, `PreflightCheckNamespacesResult`, `Tagname`, `ListingNodeRow`, `TestVectorResult`, `ActorType`, `Point`, `CliFlags`, `LayersTreeItem`, `ParticipantsAddedListener`, `SessionChannel`, `RouteAction`, `IChangeRequestManagementItem`, `SKColor`, `MDCTabScrollerAdapter`, `DeleteRoomCommandInput`, `VolumeBackupSchedule`, `IMatrixProducer`, `AutoCompleteProps`, `SequenceTypes.Participant`, `EditableTextBase`, `BottomTabBarProps`, `RepoSideEffectPendingExpectation`, `VpcConfig`, `TenantService`, `CompilerOperation`, `WsProvider`, `PaginationServiceStub`, `SelectionShape`, `DeletePresetCommandInput`, `CleanupType`, `NotifyQueueStore`, `ClarityType`, `BrowserInterface`, `AssetModule`, `SudokuBoard`, `DropOptions`, `SupervisionCCGet`, `RuleFn`, `MorphOptions`, `NodeTransform`, `GitInfo`, `FigurePart`, `AutoImportResultMap`, `CommunicatorEntity`, `DropdownService`, `ScalesCache`, `UIContextProps`, `ISurveyCreatorOptions`, `S5PL2Layer`, `AgCartesianChartOptions`, `ROPCService`, `CombatPlayerComponent`, `SerializeNodeToHtmlOptions`, `PartType`, `v`, `MDCCheckboxAdapter`, `MapMesh`, `LayeredLayout`, `AlbumService`, `KeyFrameLink`, `Middleware`, `DescribeChannelMembershipCommandInput`, `SupClient.ProjectClient`, `NativeNode`, `DatasourceOverwrite`, `APISet`, `DefaultReconnectionHandler`, `EventDecorator`, `RetryConfig`, `IDOMRule`, `FormFieldType`, `Embeddable`, `PermissionItem`, `PackageInfos`, `VaultItem`, `PopupInfo`, `RunOutput`, `Call_SendResultsTo`, `d.ComponentRuntimeMembers`, `CachedItem`, `ITextFieldProps`, `ProviderApi`, `InstancePrincipalsAuthenticationDetailsProviderBuilder`, `NotificationRequestInput`, `Int32Value`, `Thumbnail`, `Selective`, `INumbersColumn`, `ZoneType`, `MountPoint`, `FetchError`, `TLE.FunctionCallValue`, `TotemFile`, `Dataset`, `CacheHandler`, `StateChannelJSON`, `UseReceiveSet`, `ImportAliasData`, `PassImpl`, `Key2`, `TextDocuments`, `SFCBuildProps`, `Facsimile`, `DisclosureStateReturn`, `FileDescriptor`, `SingletonDeployment`, `SubExpr`, `ISettingsDataStorePayload`, `ListTaskRunLogsRequest`, `ConcreteComponent`, `Speaker`, `WalletStateType`, `ApolloServerPlugin`, `NavigationProps`, `UserEvent`, `ElementMixin`, `WriteFunc`, `ClassNameCollector`, `StaticQueryDocument`, `ModalComponent`, `LinkDownload`, `WebrtcProvider`, `ListJobsCommandInput`, `CIImage`, `IEscalation`, `ResultPoint`, `GraphQLConfig`, `OverlayProps`, `BaseAxisProps`, `ClaimingSolution`, `IAssetActionTypes`, `P6`, `DirectiveList`, `Tasks`, `WizardForm`, `Mat4`, `EntryInfo`, `CommonNode`, `InternalOpts`, `MempoolTransaction`, `RenderRule`, `HsSidebarService`, `ToLatexOptions`, `AccountFacebook_VarsEntry`, `TypeOrUndefined`, `MonzoPotResponse`, `GtConfigSetting`, `KeyboardEventInit`, `EntityHydrator`, `ISelectionData`, `TimeInterval`, `WorkflowStep`, `IMarkmapFlexTreeItem`, `PutFileOptions`, `HdErc20Payments`, `FiberNode`, `S3Config`, `ValidatorError`, `location.CloudLocationOption`, `TransactionCache`, `PostCSSNode`, `GtRow`, `NVMEntryName`, `RenderTask`, `AnalyticsDispatcher`, `Proxy`, `ServerState`, `PlaybackState`, `ChatMessageReadReceipt`, `DescribeDBClusterEndpointsCommandInput`, `IDatabase`, `PluginObject`, `LogData`, `IMergeTreeDeltaOpArgs`, `JPAC`, `ModelVersion`, `Portable`, `PeerConnection`, `ShallowMerge`, `IWithHistory`, `DateTimePatternFieldType`, `FileSystemStats`, `HeaderPair`, `IApiTag`, `InternalApplicationSetup`, `StandardAccounts`, `DbEmoji`, `WechatOfficialAccountService`, `WebGLShader`, `PayloadBundle`, `ErrorUtilitiesService`, `S.State`, `SupCore.RemoteClient`, `SyncCommandWithOps`, `IDocumentReference`, `AuthContextProps`, `Optimizer`, `IOrderedGroup`, `UserPhotosState`, `AuditConfig`, `LiteElement`, `JointComponent`, `I18nContextType`, `CodeChangedEvent`, `ExecutionParams`, `ConfigDeprecationProvider`, `TelemetryData`, `ICollaborator`, `IR.BasicBlock`, `IArmy`, `GPUSampler`, `CdkScrollable`, `ChannelProperties`, `FeatureUrl`, `QueueItem`, `RegistryMessage`, `TagEventType`, `VcsFileChange`, `ExternalSource`, `Keyframe`, `BrowserFetcher`, `RPiComponentType`, `FsFile`, `VaultBackupType`, `Todo`, `CategoryDescription`, `TVEpisodeDAO`, `Visitor`, `Uploader`, `MenuBuilder`, `Q.IPromise`, `FieldFormatsContentType`, `OauthSession`, `TrueFiCreditOracle`, `ScheduleState`, `IncrementDirection`, `TFS_Build_Contracts.Build`, `TypeScriptSubstitutionFlags`, `SemanticContext`, `TabData`, `t_b79db448`, `d.JestEnvironmentGlobal`, `BaseGraph`, `CMDL`, `NitroState`, `BaseEditor`, `ts.Identifier`, `ExploreOptions`, `ResolveCallback`, `OpenSearchDashboardsResponse`, `WalletInterface`, `ApiErrorMessage`, `CountParams`, `ImportStatements`, `HTMLMetaElement`, `requests.ListCrossConnectsRequest`, `RouteData`, `InHostPacket`, `CreateChannelResponse`, `BaselineOptions`, `SFCScriptBlock`, `FileSystemProviderWithFileReadWriteCapability`, `ArrOf`, `PropertyCollection`, `WebApiTeam`, `IControllerConfig`, `BNLike`, `IMapState`, `CodeActionsOnSave`, `Community`, `Filler`, `JobMessage`, `SwitchWatcher`, `LastInstallFlag`, `MindMapModel`, `SpeakerInfo`, `NetworkConfiguration`, `DirectiveNode`, `ActionHandlerContext`, `ConfigEnv`, `ColorStateList`, `DockerRegistryHelper`, `StyledLabelProps`, `ClipboardService`, `ACP.SuggestionsRequestedEvent`, `Movie`, `FlowTreeTopicNode`, `DimensionMapping`, `IPeripheral`, `WorkspaceEntry`, `GetTranscriptCommandInput`, `BezierCoffs`, `freedom.FreedomInModuleEnv`, `TableInterface`, `AngularFireStorageReference`, `FileSpec`, `FailedRequestType`, `requests.ListProtectionRulesRequest`, `TSelections`, `AppFilters`, `Cardinality`, `LyricLanguage`, `_ITable`, `MapSearchCategory`, `GlyphplotComponent`, `LibraryContextSeries`, `DateSpan`, `Apollo.QueryHookOptions`, `EventInterpreter`, `Gui.Widget`, `IVarXYValue`, `TypeVarMapEntry`, `FormTypes`, `ParseElement`, `MuteRoomTrackRequest`, `PicassoConfig`, `VideoFileModel`, `UpdateExpressionDefinitionChain`, `CacheContext`, `ILogOptions`, `TextLayoutStyle`, `DaffCartAddressFactory`, `CustomTypes`, `NSIndexPath`, `VgAPI`, `SanitizedProtonApiError`, `AbstractContract`, `TextDocumentRegistrationOptions`, `ValueFormatterParams`, `UrlConfig`, `DayGridWrapper`, `WrappedEntity`, `Datum`, `PrimaryKey`, `TipsLabels`, `DefaultRenderingPipeline`, `NodeSpecOverride`, `RTCRtpReceiver`, `FieldValidateConfig`, `TProps`, `CasePostRequest`, `DeferredNode`, `RouteDataFunc`, `StorageKey`, `TasksStoreService`, `RSASigningParams`, `SequentialArgs`, `TextDecoration`, `ExecutionErrorProperties`, `ErrorProps`, `ListActionsCommandInput`, `ManifestActivity`, `TouchState`, `SchemaValidatorFactory`, `SimpleScalarXmlPropertiesCommandInput`, `SqrlParserState`, `CodeFixContext`, `Trade`, `SetIpAddressTypeCommandInput`, `vscode.CodeActionContext`, `RequestQueryOptions`, `RollupWatcherEvent`, `Peripheral`, `ReduceOptions`, `Dialogue.Argv`, `ClippedVertex`, `OrderStatus`, `ContextLogger`, `TemplatePatcher`, `EPickerCols`, `USER`, `IGetPatchDirResult`, `BitcoinCashSignedTransaction`, `ConfigurableConstraint`, `RGBA`, `ResponseError`, `ISshSession`, `Comparison`, `SpecDefinitionsService`, `IParseProps`, `ModuleResolutionCache`, `SubgraphDeploymentIDIsh`, `types.UMLClassMember`, `PageContext`, `PopulatedTransaction`, `MicrofabComponent`, `FormattedTransactionType`, `Validators`, `dScnKy_env_light_c`, `DocSection`, `ModalConfig`, `InitAckChunk`, `GeneratorState`, `EndUserAgreementService`, `EventFieldInfo`, `SimEnt`, `FileContext`, `ChordType`, `ShallowStateItem`, `StringLiteralUnion`, `DefinitionElementProps`, `BoundSideType`, `GitlabAuthTokenRepository`, `UpdateRoomMetadataRequest`, `UserCredentials`, `DaffOrderItem`, `WithEnum`, `TabsService`, `UpdateReplicationConfigurationCommandInput`, `BrowserContext`, `MessageMock`, `CheckoutAddressesPage`, `Variable`, `PiContainerDescriptor`, `JobState`, `RobotStateAndWarnings`, `WrappedWebSocket`, `IRowDetails`, `ExecuteTransactionCommandInput`, `ElementPropsWithElementRefAndRenderer`, `OhbugEventWithMethods`, `MapExtent`, `BytesValue`, `LoaderBundleOutput`, `PropertyEditorInfo`, `ResourceActionMap`, `Registers`, `CaseStyle`, `Heater`, `CacheRecord`, `FieldDefinition`, `FirebaseSubmission`, `IAsyncEqualityComparer`, `parse5.ASTNode`, `NamedNodeMap`, `CourseState`, `Host`, `DescribePackagesCommandInput`, `nsISupports`, `ThemeIcon`, `OtokenFactoryInstance`, `AnyImportOrRequireStatement`, `IntervalHistogram`, `RecordSourceSelectorProxy`, `KeywordTypeNode`, `ContextErrorMessageProps`, `DecoderResult`, `GtkElement`, `INavLink`, `Door`, `TCmd`, `ModuleJSON`, `IterationTypesResolver`, `SettingsComponent`, `RPC.KVClient`, `ChainGetter`, `ModelIndexImpl`, `SpeechRecognitionResult`, `SessionImpl`, `Multer`, `BaseVisType`, `estypes.SearchHit`, `BackstageItemState`, `StyProg`, `Original`, `SimpleRNNCellLayerArgs`, `Avatar`, `ast.NodeType`, `Timetable`, `UnpackAttrs`, `InputFieldsComponentsDef`, `SelectTool`, `FormatState`, `NodesInfo`, `Creator`, `ScmFileChangeNode`, `AnyIterableIterator`, `EclipticCoordinates`, `NormalizedTxBitcoin`, `FunctionMutability`, `InternalProps`, `ReuseItem`, `Auditor`, `TranspileOutput`, `SQLQuery`, `SQLVariables`, `DevicesService`, `NextPage`, `Bbox`, `ServiceDescription`, `AddPermissionCommandInput`, `Array2DHashSet`, `ShellCommand`, `SvgDebugWriter`, `MagicExtensionError`, `ComponentWithProps`, `ObjectValidator`, `TestFunctionImportEntityReturnTypeCollectionParameters`, `ClientRequestFailedEventArgs`, `EmbeddableSetupDependencies`, `DeleteHsmCommandInput`, `BaseCollider`, `requests.ListSubnetsRequest`, `IManifestBindArtifact`, `PluginOptions`, `PTPDataView`, `ITeamCardState`, `NearestPointOnLine`, `TToken`, `ActionReducerMap`, `Aux`, `CompositeMenuNode`, `UntagResourceRequest`, `TileReadBuffer`, `PkgJSON`, `PouchDB`, `ISwissKnifeContext`, `Pile`, `CollidableCircle`, `IDirectoryModel`, `IApolloContext`, `NormalizedFile`, `ContextBinding`, `Transfer`, `NativeStorage`, `RobotState`, `RouterAction`, `ListaTarefas`, `SchemaOptions`, `TPackage`, `FeedbinConfigs`, `EditorInspectorService`, `NodeStructure`, `StatusPresenceEvent`, `WebSocketSubject`, `IniFile`, `AggField`, `CosmosdbAccount`, `TAggConfig`, `RenderTreeFrame`, `AppStateStore`, `DefaultSequence`, `StoredConfiguration`, `FootnotesItem`, `Policy`, `IResourceInfo`, `MyService`, `OptionsStackingValue`, `MediaTags`, `ConditionalTransaction`, `EditorRange`, `JsonSchema7Type`, `IKeyValuePair`, `Errorable`, `KubernetesService`, `PropertyUpdatedArgs`, `DaLayoutConfig`, `ComponentManager`, `HdDogePayments`, `Op`, `RpcConnectionWriter`, `Guide`, `AudioState`, `DaffAuthToken`, `CeloTokenType`, `Servient`, `React.SVGProps`, `BindingDef`, `ExceptionListItemSchema`, `MovieOpts`, `OutputTargetDistLazy`, `IAnyVisualization`, `ProblemInfo`, `MockConnection`, `PlayerData`, `HotkeyConfig`, `d.PrerenderManager`, `AccountApple`, `Logquacious`, `ChecklistTask`, `RendererStyleFlags2`, `PanelNotificationsAction`, `REPL`, `Socket`, `FabricGatewayConnectionManager`, `DataFrame`, `ThySlideService`, `UpSetThemes`, `DeleteDomainCommandOutput`, `GraphicOptions`, `IZ64Main`, `SecurityGroupRulePorts`, `GlobalAveragePooling2D`, `CachedResource`, `ByteVectorType`, `AtomTweeningNumberElement`, `ImportReplacements`, `InputContext`, `Derivative`, `MessageImages`, `ControllerStateAndHelpers`, `MethodDecorator`, `FilterConstructor`, `EntityStateResponse`, `PokerHandResult`, `ICategoricalFilter`, `GX.RasColorChannelID`, `ArrayBufferLike`, `CircularAxisData`, `IGameCharaUnit`, `GetCommandInvocationCommandInput`, `GfxRenderPipelineP_GL`, `Rank`, `DataProxyAPIErrorInfo`, `requests.ListManagedInstancesRequest`, `MaybePatterns`, `StringProperty`, `SingleSegmentArena`, `Pose`, `ListGroupsResponse`, `DeleteResult`, `DatabaseSubType`, `MethodOptions`, `t.ObjectExpression`, `ParameterizedContext`, `CircuitBreakerOptions`, `DonwloadSuccessData`, `ActionTicket`, `timePickerModule.TimePicker`, `AuthTokenRequestSigner`, `ScanResultResponse`, `LineInfo`, `IRemovalInfo`, `RadListView`, `PointerPressAction`, `IFileAccessConfiguration`, `SchemeRegistrarWrapper`, `NetworkError`, `ProposalIdentifier`, `AgencyApiResponse`, `ElementOptions`, `AnyRenderFunction`, `IGroupFilterDefinition`, `Raycaster`, `InputInfo`, `ClickHandler`, `Pixel`, `Iterate`, `TransformedStringTypeTargets`, `ITransitionActions`, `TemplateWrapped`, `TSForIn`, `tStringCurrencyUnits`, `CB`, `ACCategory`, `TSQuerySelectorNode`, `ITheme`, `Rational`, `IXPath`, `ParseConfigFileHost`, `ActiveErrorMessage`, `GLTFResource`, `loaderCallback`, `Helper`, `SurfaceLightmapData`, `vscode.TextLine`, `FlatConvectorModel`, `StorageTransformPlugin`, `LambdaDataSource`, `SWRHook`, `TechnologySectionProps`, `RuntimeTreeItem`, `ISearchStrategy`, `ReactTestInstance`, `ColorDef`, `TrieNode`, `Z64Online_EquipmentPak`, `yargs.Argv`, `PDFAcroComboBox`, `ConnectionSummary`, `Descendant`, `EventDelegator`, `NodeInstance`, `LocalizedSteps`, `BreadcrumbsOptions`, `TransportConfiguration`, `ApplicationTokenCredentials`, `ChainInfoInner`, `AsyncIterableIterator`, `CfnRole`, `PlanetInfo`, `Dockerode.Container`, `BlueprintInfo`, `d.PrerenderUrlRequest`, `sdk.LanguageUnderstandingModel`, `AnalysisResult`, `CompilerSystemCreateDirectoryOptions`, `IUploadOptions`, `OverridedMdastBuilders`, `StepExpr`, `IInspectorState`, `FetcherContext`, `ValidationErrorPath`, `IInitiativeModel`, `ParserProduction`, `SerializedSavedQueryAttributes`, `BitExprContext`, `PossibleSelections`, `GfxQueryPoolType`, `IsAssign`, `ColorDynamicStylePropertyDescriptor`, `ListApplicationVersionsCommandInput`, `configuration.APIData`, `BattleModel`, `NotifyModel`, `TransactionAuthField`, `Timefilter`, `ResolverResolveParams`, `FunctionTypeResult`, `KameletModel`, `TEmbeddableInput`, `CoreAPI`, `TestFactory`, `FormattedEntry`, `V2SubgraphPool`, `ThumborMapper`, `TimeAveragedBaseRateOracle`, `TreeChild`, `StringOptions`, `HtmlContextTypeOptions`, `BatchItem`, `Capture`, `MetricResult`, `OptionsAfterSetup`, `IEditorMouseEvent`, `TransferMode`, `Polyline`, `ICandidateUpdateInput`, `ValidateFunction`, `DejaSelectComponent`, `ProtocolParameters`, `SpyAsyncIterable`, `KeyedSelectorFn`, `types.IActionContext`, `RemoteStoreOptions`, `AuguryEvent`, `AirlineService`, `SearchQueryCtx`, `ParentFiber`, `AbbreviationTracker`, `RecordingTemplate`, `Ext`, `SoundService`, `PickDeepObj`, `TestErc20`, `FiddleSourceControl`, `VerificationRule`, `Floating`, `Output`, `AdminGameEntity`, `TransactionEventType`, `GlobalSettings`, `GetPackageVersionHistoryCommandInput`, `ng.IHttpService`, `AuthData`, `IndexedClassMapping`, `BankAccountService`, `IDeployment`, `LoggerProxy`, `ScrollBehavior`, `FlowLog`, `ParsedIOMessage`, `BemToBlockClassMap`, `CreateFileOptions`, `Dice`, `RequestResponse`, `MatchPairsOptions`, `OscType`, `CashScriptVisitor`, `MDCTabIndicatorAdapter`, `requests.ListBootVolumeReplicasRequest`, `SecretUtils`, `angular.ui.bootstrap.IModalService`, `ScatterPointItem`, `PreReqView`, `MockAirtableInterface`, `Knex.Raw`, `Parser.SyntaxNode`, `ContentFolder`, `InputField`, `DateInput`, `BrowserSession`, `InvalidParameterException`, `ListPingMonitorsRequest`, `ProvisioningParameter`, `Sku`, `MatchmakerAdd_StringPropertiesEntry`, `FactEnvelope`, `INodeInterface`, `TsInputComponent`, `NSError`, `GuildEmoji`, `LeaguePriceDetails`, `ForgeModInput`, `InflightKeyGenerator`, `JSDocTypedefTag`, `Uint8ClampedArray`, `AuthCore`, `SmallMultiplesSpec`, `NofloComponent`, `IFeed`, `TestingFacade`, `CharRangeSection`, `ApprovalPolicy`, `PipetteNameSpecs`, `ReactApollo.OperationOption`, `GunRolls`, `MsgCreateCertificate`, `HookHandler`, `ENDDirective`, `StreamParam`, `UpdateBillingParams`, `Boolean`, `DatabaseConnection`, `FreePoint`, `BtcUnlock`, `OrganizationService`, `SyncTasks.Promise`, `WatchEvent`, `AqlQuery`, `vscode.TestItem`, `PlannerConfiguration`, `PointSeries`, `ResponsivePartialState`, `ProjectIdAndToken`, `FieldFilter`, `VpcTopologyDescription`, `ElementMetadata`, `BuildConditionals`, `Path7`, `NVM500JSON`, `GridView`, `ConchVector3`, `IRunExecutionData`, `FilterExpressionNode`, `PragmaDirectiveContext`, `SVGVNodeAttrs`, `LinkFacebookRequest`, `def.Vec2`, `DiscussionDocument`, `NativeActivation`, `HttpMethod`, `SerializedMessage`, `NodePrivacyLevel`, `InputParamMapper`, `ResourceNode`, `XMLHttpRequest`, `DisjointRangeSet`, `AssessmentItemController`, `FeatureEdges`, `JMapInfoIter`, `Type.TPowerQueryType`, `ExpressionsStart`, `ExchangePriceQuery`, `SearchKey`, `AccountFixture`, `ScopedClusterClientMock`, `DockerApi`, `IEntrypoint`, `DeleteExperimentCommandInput`, `ModelObj`, `DebugStateAxes`, `AvatarSource`, `LoadingState`, `CustomRenderElementProps`, `EnumValues`, `AuthenticationState`, `MenuApiResult`, `RawSavedDashboardPanel630`, `ColumnsSchema`, `InjectionService`, `CreateInputCommandInput`, `ViewRanges`, `BitbucketAuthResponse`, `AccountAndPubkey`, `ShapeStyle`, `NetlifyConfig`, `Composite`, `RequestListener`, `AggsCommonStartDependencies`, `PluginDeployerEntry`, `NetworkStatusEvent`, `PromisedAnswer`, `ScreenDimension`, `ExcalideckEditorState`, `StreamInterface`, `DeepLinker`, `Field_Slot`, `XmlAttributes`, `AngularFireStorage`, `RegularStep`, `DispatchWithoutAction`, `DropEvent`, `NameNode`, `RootData`, `HostWatchEvent`, `IAccountInfo`, `Input`, `ScaleLinear`, `Knex.JoinClause`, `RulesPosition`, `estypes.MgetResponseItem`, `Style`, `Question`, `GpuInformation`, `CreateSessionCommand`, `BlockArchiveLine`, `LRUMap`, `ContextTransformFieldType`, `ICache`, `StyleResources`, `TradeContext`, `ReactQueryConfig`, `ConfigurationGroup`, `CacheUpdateEvent`, `XI18nService`, `PlanInfo`, `UserLecture`, `AnimatorControllerLayer`, `SpringRequest`, `MVideoAccountLight`, `UpdateAuthorizerCommandInput`, `options`, `ProjectBuildOptions`, `GridConfig`, `CurrencyDisplayNameOptions`, `ClassConstructor`, `LinkLabelVM`, `ValidationResults`, `Function`, `GameType`, `AuthRequest`, `LogLevelType`, `EdgeNode`, `ArweavePathManifest`, `vscode.Task`, `TextureSource`, `MBusForm`, `VectorSourceRequestMeta`, `IAnswer`, `yubo.RecordOptions`, `HandlerStack`, `Sample`, `DeleteApplicationCommandOutput`, `DefinerClauseContext`, `MkReplaceFuncStore`, `FeedbackActions`, `InstanceRejectOnNotFound`, `TagModel`, `PayloadActionCreator`, `XmlDocument`, `BIP32Interface`, `IGLTFNode`, `VueApolloSmartOpsRawPluginConfig`, `TheiaURI`, `ValueContainerProps`, `UIAlert`, `UnknownParticipant`, `LoaderEvent`, `DeclaredElement`, `IAjaxSettings`, `CliCommandProvider`, `PageFlip`, `Vin`, `HttpsCallableResult`, `BoxOptions`, `ProductModel`, `DataStreamInfo`, `ClientSessionEntryIded`, `AuditLogger`, `DetectionMetrics`, `Progression`, `IDeliState`, `INormalizedMessage`, `LastValueIndexPatternColumn`, `AdbBufferedStream`, `IEmailTemplateSaveInput`, `AnnotationType`, `ThemeLoadOptions`, `CodeGeneratorFileContext`, `LibraryNotificationAction`, `ImportDefaultSpecifier`, `SavedObjectsRawDoc`, `PotentialPartnerActions`, `Electron.Session`, `SessionProxy`, `HippodromeEditOptions`, `RefLecture`, `GeoJsonProperties`, `ItemTemplate`, `msRest.CompositeMapper`, `UpdateDatasetCommandInput`, `PartitionSpec`, `TranslationItem`, `MoveData`, `TestTimelineDataProvider`, `CdsRadio`, `Extended`, `UriResolver`, `OpenSearchClientConfig`, `ScriptCmd`, `BeanProvider`, `ResponderActivityType`, `KnownDomain`, `TType`, `VoidFn`, `data`, `RecordedDisplayData`, `TrackedImportSymbol`, `TransformOrigin`, `AsciiOperatipnParams`, `ObservableUserStore`, `Electron.IpcMainEvent`, `MethodHandle`, `IOffset`, `URLTransitionIntent`, `PymStorage`, `GfxBindingLayoutSamplerDescriptor`, `IMutableGridCategoryItem`, `Propagation`, `StateAB`, `ServerMap`, `Dispatched`, `ListTagsCommandOutput`, `FormFieldsType`, `TriggerAction`, `UsageInfo`, `GoalKPI`, `PlacementProps`, `UpSetAddon`, `requests.ListManagementAgentInstallKeysRequest`, `ApiDefForm`, `SkipListMap`, `RTCDataChannelEvent`, `ArrayFunc`, `Config.Argv`, `CausalRepoCommit`, `ICCircuitInfo`, `ArrayEntry`, `CodeExecutionEmitter`, `LinesIterator`, `RangeRequest`, `FlattenContext`, `SpecialPropertyAssignmentKind`, `VfsObject`, `MockDynamicContent`, `ERC20FakeInstance`, `CliAction`, `AceConfigInterface`, `BasicCCSet`, `IDocumentSnapshot`, `SavedObjectsCollectMultiNamespaceReferencesOptions`, `ParamValue`, `M`, `INohmPrefixes`, `requests.ListDbHomePatchHistoryEntriesRequest`, `BackendErrorLabel`, `EntryObject`, `html.Node`, `WsBreadcrumbsService`, `GenerateClientOptions`, `RepositoriesStatisticsState`, `ModuleDefinition`, `VObject`, `UIImageView`, `PWAContext`, `ProppyFactory`, `PoiTable`, `ServiceConfigurationOptions`, `CacheEntry`, `FileState`, `BRepGeometryCreate`, `DemoSettings`, `DataViewField`, `IBookmark`, `ListJobsRequest`, `CreateApp`, `PriceLineOptions`, `ApplicationGateway`, `Toggleable`, `UIViewControllerTransitionCoordinator`, `SpanAttributes`, `WorldgenRegistryKey`, `Grid`, `ApisInfo`, `FcModel`, `apid.BroadcastingScheduleOption`, `ProjectGraph`, `IReadOnlyFunctionParameterCollection`, `StaticConfigParsed`, `WsDialogService`, `QueryTree`, `FlattenedXmlMapWithXmlNamespaceCommandInput`, `SpeechContext`, `PropCombination`, `Lineup`, `CompilerWorkerTask`, `CellPosition`, `AudioNode`, `NodeJS.WriteStream`, `thrift.TProtocol`, `IRecord`, `BrokenConeSide`, `StyledVNode`, `ResourceValue`, `IMaterialUniformOptions`, `sdk.TranslationRecognitionEventArgs`, `StoreService`, `Relay`, `RelationshipProps`, `OperationMetadata`, `ICustomClassUIMethod`, `TxInput`, `FixtureContext`, `IpcPacket`, `RelatedClassInfo`, `StoreDestinationArray`, `SdkError`, `CurrencyFormatOptions`, `HeadlessChromiumDriver`, `AsyncMethodReturns`, `MuteConfiguration`, `LoopBackFilter`, `DeleteClusterCommandInput`, `OrderedHierarchyIterable`, `FoamTags`, `IReduxState`, `FunctionCallContext`, `Layers`, `MinimalNodeEntity`, `IUserService`, `DeterministicDeploymentInfo`, `EquivMap`, `EventEnvelope`, `ITagInputItemProps`, `GridAxis`, `AwaitedCommandEntry`, `ex.PostDrawEvent`, `TimeInstant`, `GSMemoryMap`, `LightType`, `RemoteMessage`, `BitBucketCloudPRDSL`, `JQuery`, `RpcCallParameters`, `ITemplate`, `GenericNack`, `Join`, `IContentSearchRequest`, `MatchList`, `SolutionToApiAnalysis`, `MembersInfo`, `Path4`, `OrderedRotationAngles`, `ModuleNameAndType`, `ZodIssue`, `TaskLogger`, `MockRouter`, `DayProps`, `requests.ListSecurityListsRequest`, `ChangeAnnotation`, `ConnectionResult`, `EmulatorContext`, `ObjectDescription`, `ESLMediaRule`, `NotificationItem`, `KeysToKeysToAnyValue`, `AutomationEvent`, `Environment_t`, `CellData`, `KeyMap`, `VideoChatSession`, `TableSeg`, `ScaleGroup`, `Tense`, `SearchItem`, `ThStmt`, `ControllerMeta`, `TrackMapOptions`, `requests.ListUserAssessmentsRequest`, `PaneOptions`, `VisibilityVertexRectilinear`, `vscode.CompletionItemKind`, `ScreenDetailDto`, `$FixMe`, `ITimelineModel`, `EmployeeService`, `AnimatedComponent`, `OtokenInstance`, `BuiltinFrameworkMetadata`, `TheSagaStepfunctionSingleTableStack`, `events.Name`, `Tutorial`, `IGLTF`, `msRest.OperationQueryParameter`, `FindingCriteria`, `FeeMarketEIP1559Transaction`, `HostInstanceMap`, `Flags`, `d.ExternalStyleCompiler`, `PaperAuthor`, `ClientId`, `PolicyBuilderPaths`, `IVideoApiModel`, `ValidatorResult`, `SortedArray`, `PriceSpecGroup`, `IExecutionResponse`, `CatchClause`, `Human`, `TSQueryOptions`, `MarkerProps`, `ValidationConfig`, `RegistryDocument`, `LayerNode`, `Cue`, `NavParams`, `Highcharts.VennRelationObject`, `N5`, `PluralSub`, `GoConditionalMove`, `RelativeTimeFormat`, `messages.Feature`, `Vector4_`, `CacheIndex`, `ScalarActivity`, `PeerConfig`, `OctokitResponse`, `FileHashCache`, `IPreviewProps`, `angular.IPromise`, `MapChart`, `RouteHandlerMethod`, `TextBufferObject`, `I18nStart`, `ReactEventHandlers`, `RpcNode`, `SimulatorDevice`, `RoomItem`, `ImGui.DrawVert`, `DeclarativeCall`, `ArgMap`, `MockElectron`, `ts.BinaryExpression`, `NotifyOpts`, `CommentDoc`, `ClusterContextNode`, `IBaseImageryMapConstructor`, `ModelCache`, `MotionResult`, `KeyData`, `SimpleObject`, `XRInputSource`, `IKeymap`, `IDBVersionChangeEvent`, `MutableControlFlowEnd`, `ListConnectionsCommandInput`, `EMSTermJoinConfig`, `Requestor`, `LayoutStyleProps`, `GenericMessage`, `JupyterLab`, `IShadingContext`, `bluebird`, `AESEncryptionParams`, `CommunicationParticipant`, `BigFloat32`, `imperative.IProfileLoaded`, `DeleteApiKeyCommandInput`, `KnobsConfigInterface`, `EventUI`, `OrderedIndex`, `ITaskSource`, `IGetTimeSlotStatistics`, `TabInstance`, `PdfCreator`, `GeneralConfig`, `undefined`, `AttachmentMIMEType`, `LunarInfo`, `HTMLDocument`, `LicensingPluginSetup`, `Cave`, `InspectionFailure`, `LogStructuredData`, `MobileRpcChunks`, `SignedCredential`, `Composer`, `BB.Activity`, `PatternMappingNode`, `HttpRequestWithLabelsCommandInput`, `EntityOptions`, `ExampleSourceFile`, `IWarning`, `egret.MovieClip`, `CSSStyleSheet`, `ThemesDataType`, `Generations`, `ZoweDatasetNode`, `CanvasGradient`, `InvoicePromo`, `BreadcrumbPath`, `ITransportConstructor`, `NewId`, `TreeNodeComponent`, `EditStatus`, `ISetItem`, `ReactiveController`, `requests.ListOceInstancesRequest`, `CodeRepository`, `ListJobRunsCommandInput`, `ClientStateType`, `Sein.IResourceState`, `SingleOrBatchRequest`, `ConfigService`, `PartnerActions`, `CapacityReservation`, `DisplayObjectTransformationProcess`, `OHLCPoint`, `IDatepickerLocaleValues`, `SelectorArray`, `PrivateIdentifier`, `Array4`, `DailyRate`, `UpdateOneOptions`, `Stacks`, `Logo`, `Promotion`, `IModelAnimation`, `MinAttrs`, `ODataEntityResource`, `ApisTreeItem`, `ConvertState`, `OutputTargetDocsVscode`, `MatchExplanationTreeNode`, `INodeData`, `CoursesService`, `ScheduleActions`, `SizeType`, `Caret`, `BufferSource`, `L2Args`, `ServiceContainer`, `CloudfrontMetricChange`, `XPCOM.nsXPCComponents_Classes`, `DocumentEntryIded`, `RuleSpec`, `ConfigType`, `MappedPosition`, `ProcessedPublicActionType`, `StickyVirtualizedListProps`, `App.windows.window.IMenu`, `CLR0_MatData`, `Memoized`, `PostFilter`, `ProofTest`, `API.services.IChromeFileService`, `LogResult`, `IDynamicStyleProperty`, `DetachPolicyCommandInput`, `Hand`, `GestureUpdateEvent`, `RouterTask`, `SelectPartyToSendDelegate`, `MatDialogConfig`, `HslaColor`, `ResourceConfig`, `FSPath`, `UserMedia`, `ExportTypesRegistry`, `NumberInputOptionProps`, `CompleteResult`, `ExtendableEvent`, `Timespan`, `GroupedPriorityList`, `Marble`, `SFCDescriptor`, `Debe`, `MosaicDirection`, `RecipientElement`, `CommerceTypes.CurrencyValue`, `TxData`, `BudgetResult`, `DeleteRepositoryResponse`, `FileHandler`, `CachedNpmInfoClient`, `ServerlessRecord`, `MessengerTypes.PsidOrRecipient`, `GraphOptions`, `MatSortable`, `IModelContentChangedEvent`, `RouteEffect`, `FormikErrors`, `IpcMainInvokeEvent`, `RepoConfig`, `DocumentConnectionManager`, `HMACKey`, `ElementAttrs`, `TaskCommand`, `ParamItem`, `ResponseEnvelope`, `DisplayObjectWithCulling`, `BilinearPatch`, `Apply`, `RuleDescription`, `ChangeOptions`, `CreateUserCommand`, `SelectEffect`, `ReleaseChangelog`, `NamedAttrMap`, `Dialogic.Item`, `OptionService`, `NotificationTargetItem`, `Jobs`, `TestConfigOperator`, `ItemSection`, `AbstractCommandDescriptor`, `AppWithCounterState`, `RecordsQuery`, `CoreEditor`, `EditingData`, `HuffmannNode`, `Rx.Subscriber`, `CreateClusterCommand`, `DateEntry`, `ShadowsocksManagerService`, `AxiosHttpClient`, `WorkspaceSnaphot`, `IOrganizationRecurringExpenseFindInput`, `BlogPostService`, `NormalizedCacheObject`, `PropParam`, `CircleEditOptions`, `LoggableTarget`, `FoodReducerState`, `SanitizedAlert`, `DeclarationCache`, `PromiseRes`, `BlogTag`, `AxisEdge`, `UIFunctions`, `ResolverRegistry`, `React.CSSProperties`, `QueryInfo`, `PointLight`, `Slugifier`, `IArgs`, `AzureBlobStorage`, `ResolvableCodeLens`, `LuaSymbolInformation`, `ExampleData`, `Plan`, `HexLiteralNode`, `ShallowRef`, `YieldFromNode`, `Subst`, `GeoUnitIndices`, `AppError`, `PromiseRejectionEvent`, `UiActionsStart`, `AuthorizationMetadata`, `Spine`, `MetaesException`, `SocketIO`, `Clause`, `LabaColor`, `HttpHealthIndicator`, `GetSpaceEnvironmentParams`, `VisualizeUrlGeneratorState`, `IApiOperation`, `BurnerPluginContext`, `InputComponent`, `ExpressionRunnerShorthandConfig`, `React.KeyboardEventHandler`, `TextureUsage`, `PcmChunkMessage`, `ListIdentitiesCommandInput`, `TTargetReference`, `Node.Event`, `ReadableSpan`, `IFluidDataStoreContext`, `V1PersistentVolumeClaim`, `GfxProgramP_GL`, `OcsNewUser`, `TreeEdge`, `TimePeriodField`, `IPaneContent`, `React.DetailedHTMLProps`, `ZoneDelegate`, `IModelReflectionData`, `JsonaAnnotation`, `StyledOtherComponent`, `CurlyOptions`, `IKernelConnection`, `RenderElement`, `NetworkLoadBalancer`, `TokenizerConfig`, `Id`, `PouchDatabase`, `NetworkSourcesVirtualSourceList`, `NcTemplate`, `IORedis.Redis`, `TimesheetFilterService`, `painless_parserListener`, `AnnotationControllable`, `Prefs`, `ResultWithType`, `TypeKind`, `requests.ListReplicationPoliciesRequest`, `SeederCollection`, `SrtpSSRCState`, `FontStyle`, `TelegramBot.Chat`, `ResultEquipped`, `MegalodonInterface`, `TProperty`, `RelativeRect`, `IHeaderState`, `ListServicesRequest`, `d.TestingConfig`, `ThyGuiderManager`, `ICDN`, `ProgressBarService`, `CursorPagingType`, `JsonAtom`, `EggPlugin`, `IRGBColor`, `CollisionContact`, `Protocol.Runtime.RemoteObject`, `UploadRequest`, `GunNode`, `NdArray`, `BinaryOp`, `P2PResponsePacketBufferData`, `RelationInfo`, `MutationHookOptions`, `Names`, `MessageResp`, `StopJobCommandInput`, `DetectedCompiler`, `RPCResponse`, `EntAsset`, `BlockchainWalletExplorerProvider`, `SliderHandle`, `IMergeTreeOp`, `PermissionDeniedState`, `FilterObject`, `ServiceControlPolicyResource`, `MediaInfo`, `MatchmakerMatched_MatchmakerUser`, `NumberBase`, `YamlMappingItem`, `JSONResponse`, `IApiResponse`, `StatusEntry`, `TimeTravel`, `SurveyPropertyEditorBase`, `ListFindingsResponse`, `MIROp`, `InDiv`, `UserPosition`, `Nibbles`, `ConfigParameters`, `ISequence`, `TestOptions`, `Multicast`, `TinyTranslatorService`, `ChangeBuffer`, `AfterGroupCallback`, `IParagraphMarker`, `EquipmentDelay`, `NamedType`, `Serverless`, `DeployArgs`, `EventTypeMap`, `MethodDocumentationBlock`, `ViewportCoords`, `FeedDict`, `ColorInfo`, `SubmissionSectionError`, `CalibrationState`, `FirstDataRenderedEvent`, `ArticleProps`, `UserProfile`, `MOscMod`, `RoomService`, `ITokenClaims`, `RelaxedPolylinePoint`, `RGBColorType`, `BufferedChangeset`, `UpdateConnectionCommandInput`, `IAppVersion`, `TextEditorConfiguration`, `RpcRequestFulfillment`, `Start`, `React.SyntheticEvent`, `StateTimelineEvent`, `StripePaymentListener`, `EitherNodeParams`, `MediaPlayerState`, `PrereleaseToken`, `GlitzServer`, `WaiterResult`, `IAggregationStrategy`, `TextInsertion`, `ReactFragment`, `AccessTokenRequest`, `ParsedJSXElement`, `SalesSearchOptions`, `SpaceId`, `NgStyleInterface`, `TSFunDef`, `ValueSetterParams`, `EvalEnv`, `ModalManager`, `LoggerClient`, `ZoneChangeWhisperModel`, `PrismaClientUnknownRequestError`, `VisualizeFieldContext`, `BaseItemState`, `BindingKey`, `ICustomData`, `IChunkHeader64`, `CollectionStore`, `AsyncCommandWithOps`, `EYaml`, `MPRandGauss`, `Album`, `CodeSource`, `TransactionFactory`, `InMemoryConfig`, `AccountRefresh`, `OmvGeometryType`, `IFormItem`, `native.Array`, `Instrument`, `SimpleDate`, `InitTranslation`, `MeshBuffers`, `IKLink`, `GBMinInstance`, `IFluidSerializer`, `TimingInfo`, `FileDetails`, `UseBodilessOverrides`, `IUnitModel`, `PartyClose`, `PerpMarket`, `BasicTarget`, `DeployParams`, `IRenderData`, `requests.ListWorkRequestErrorsRequest`, `ParsedDocument`, `Voyager`, `PDFForm`, `MODNetConfig`, `UseFormValues`, `NestView`, `Ancestor`, `IListenerRuleCondition`, `IValidationResult`, `VirtualFolder`, `PhysicsCollider`, `ContributionProvider`, `UseRefetchReducerState`, `jest.CustomMatcher`, `OrbitCoordinates`, `AbbreviationAttribute`, `Stereotype`, `ZipMismatchMode`, `internalGauge`, `PedProp`, `MouseManager`, `INamedObjectSchemaProperty`, `ElementAccessExpression`, `CellOutput`, `ComponentRegistrant`, `MicrophoneIterator`, `IClientOptions`, `PayloadBundleSource`, `OptimizelyXClient`, `NVMDescriptor`, `UrlPattern`, `EdgeDescriptor`, `EvAgentSession`, `CustomPropertyHandler`, `JavaRecord`, `CampaignTimelinesModel`, `AgentPubKeyB64`, `IVisitor`, `ElemAttr`, `CreateJobResponse`, `Selection`, `JPAFieldBlock`, `IAnimationKey`, `OrbitDefinition`, `AuthClientInterface`, `UserStoreAction`, `ClientError`, `Like`, `UITransform`, `GeoVector`, `child_process.ChildProcess`, `Mathfield`, `OpenApiOperation`, `WorkerInterface`, `HookConfig`, `RedBlackNode`, `UITextPosition`, `OpenFileDialogProps`, `MinAdjacencyListDict`, `MalSymbol`, `WmsLayer`, `StartQueryCommandInput`, `TagAttributes`, `GraphFrame`, `CoapMethodName`, `BuilderEntry`, `MappingTreeItem`, `XPCOM.nsIURI`, `BubbleLegendItem.Options`, `JobIdOption`, `CourseType`, `NotificationBarService`, `SceneState`, `Multi`, `tmp.DirectoryResult`, `EggAppInfo`, `Location`, `TikTokConstructor`, `VisitorFunction`, `ProviderResult`, `WaiterConfigurationDetails`, `DeleteUserCommandInput`, `MutationTypes`, `PDFOperator`, `PositionService`, `BlockchainSettings`, `DiscoverIndexPatternProps`, `GraphQLNamedOutputType`, `IComponentEvent`, `TextCanvas`, `ControllerEvent`, `EventPartialState`, `PyteaOptions`, `XChaCha20Poly1305`, `Compressors`, `Agreement`, `RuleDescriptor`, `CommandArg`, `RX.Types.ReactNode`, `AppStatus`, `monaco.editor.IModelDeltaDecoration`, `SplitDirection`, `ILayout`, `requests.ListIntegrationInstancesRequest`, `SendAction`, `ForkTsCheckerWebpackPluginState`, `UserToken`, `FontFeature`, `DefaultGuiState`, `Shift.Node`, `UnitBase`, `ColumnSeriesOptions`, `Deserializer`, `ReactiveChartStateProps`, `Auth0UserProfile`, `RoleRepresentation`, `NotebookEvents`, `nsIURI`, `StockData`, `CLM.CLChannelData`, `IBaseTaskParams`, `SessionConfiguration`, `AutorestArgs`, `WarframeData`, `DataTable.CellType`, `OptionElement`, `ZipResource`, `ITerm`, `ISubWorkflow`, `SpawnHandle`, `FieldAccessor`, `IEditorTracker`, `JSONMappingParameters`, `FaucetConfig`, `FungibleTokenDetailed`, `sdk.SpeechRecognizer`, `FetchEnd`, `CurrencyObject`, `EntityObject`, `AsyncOpts`, `DataFormat`, `ThemePair`, `AWS.ELBv2`, `CellOptionType`, `CommonAlertState`, `ValidateArgTypeParams`, `SuitDone`, `requests.ListLimitDefinitionsRequest`, `ScaleCreationContext`, `QueryBodyType`, `EmojiListObject`, `IGeneratorData`, `ResizerKeyDownEvent`, `IRuntimeFactory`, `BufferViewResult`, `DAL.DEVICE_ID_RADIO`, `CreateOpts`, `CreateRuleGroupCommandInput`, `d.ComponentCompilerTypeReferences`, `CustomElementRegion`, `ExternalSourceAspectProps`, `TemplateValidatorOptions`, `TextMatchOptions`, `AsToken`, `FunctionEnvelope`, `HostConfig`, `ISparqlBindingResult`, `BlobService`, `ByteReader`, `MetaheroToken`, `UpdateIPSetCommandInput`, `GraphQLAbstractType`, `WebpackDevServer`, `DatasetSummary`, `ValueHolder`, `IGetEmployeeJobPostInput`, `ActionsList`, `NumberListRange`, `ClientContext`, `DeviceTracker`, `AuthenticatedUser`, `OperatorDescriptorMap`, `ApolloClientOptions`, `NormalDot`, `Ganache`, `Netlify`, `ParsedAuthenticationInstructions`, `RenderState`, `CarouselItem`, `SearchQueryProps`, `StrapiAttribute`, `NbDialogService`, `ts.CallExpression`, `WildcardIndex`, `PoiTableEntry`, `ChainType`, `MaterialAnimationTrack`, `WildcardProperty`, `InitializeServiceCommandInput`, `Range3dProps`, `ModuleResolutionHost`, `ChannelConstants`, `ObjectOf`, `SnapshotDb`, `BubbleChartData`, `TypedRequest`, `BaseLogger`, `ThemeCss`, `ScullyRoutesService`, `TagResourceRequest`, `MP.Version`, `RarityLevel`, `ChartTooltipItem`, `P2PPeerInfo`, `PollerLike`, `L.Map`, `LockTime`, `DocumentsExtImpl`, `IKeyboardDefinitionDocument`, `OutModeDirection`, `RemoteSourceProvider`, `FC`, `JsonStringifierContext`, `TreeNodeInBlock`, `DeleteUser`, `MockMember`, `PolicyProxyHookOptions`, `IContextView`, `IOSSplashResourceConfig`, `PickerInput`, `NumberAttribute`, `IEnumerator`, `MVideoThumbnail`, `ZoomState`, `ComponentLoader`, `NavigationBindings`, `DataClassBehaviors`, `FormatterProps`, `BehaviorName`, `NohmClass`, `DeploymentDisconnectStatus`, `PathConfigMap`, `DIDResolutionResult`, `RecipientOrGroup`, `NumericalRange0`, `CombinedText`, `UserFromToken`, `MutableArrayLike`, `NodeSpec`, `Bid`, `UnionTypeProvider`, `OrganizationInterface`, `HsQueryBaseService`, `SnippetString`, `JSDocType`, `LanguageType`, `OptionalCV`, `ScopedHistory`, `RxTerms`, `MDCRippleFoundation`, `SolveType`, `Edge`, `eventWithTime`, `IMinimatch`, `ParamType`, `STStyle`, `LineSegments`, `LightArea`, `Puppeteer.Page`, `TypedArray`, `ContentShareObserver`, `RippleAPI`, `Sequelize`, `RestRequestOptions`, `AttrValuesStore`, `PackageManagerPluginImplementation`, `GetVpcLinksCommandInput`, `TokensBuffer`, `InsertOneResult`, `TodoItemNode`, `NFT1155V2`, `CodeType`, `VideoFrameProcessorPipelineObserver`, `WorkspaceSymbolParams`, `MapShape`, `d.DevServerContext`, `UpdateTargetMappingsWaitForTaskState`, `JavaDeclarationBlock`, `ListChannelsResponse`, `QueryCompleteContext`, `AuthTokens`, `OperatingSystem.macOS`, `BaseContractMethod`, `TypeContext`, `Reflector`, `BreadcrumbProps`, `StyleStore`, `ConfigParameterFilter`, `LogObj`, `GXRenderHelperGfx`, `GetPermissionPolicyCommandInput`, `Decomposers`, `DropPosition`, `DomainEntry`, `TypingVersion`, `IPropertyPaneConfiguration`, `ExtractorMessage`, `BaseFormValidation`, `Defines`, `ActionImpl`, `InputComponents`, `BirthdayService`, `ICreateOrgNotificationResult`, `CommandInputParameterModel`, `RawRule`, `FModel.DllFuncs`, `d.OptimizeCssOutput`, `ISharedDirectory`, `PostCondition`, `T.Task`, `SvelteIdentifier`, `ConnectionProvider`, `ModuleSymbolTable`, `IEmailOptions`, `FunctionBreakpoint`, `StaticArray`, `RateProps`, `TextMeasure`, `BooleanValidator`, `LibrarySearchQuery`, `SimplePubSub`, `ConversationV3`, `Dec`, `PathCandidate`, `ReferenceEntry`, `LintReport`, `SerializationStructure`, `ApplicationTypes`, `IterableChangeRecord`, `ITransactionOption`, `CasesClient`, `CommandOutput`, `Filesystem.ReadJsonSync`, `ConfigurationTarget`, `Mongoose.Model`, `DocumentHighlight`, `IScriptingDefinition`, `TaskStore`, `IFilterListGroup`, `RowTransformerValidator`, `Symmetry`, `K1`, `CheckPrivilegesPayload`, `AuthContextType`, `ActiveToast`, `ResolveSavedObjectsImportErrorsOptions`, `CdkToolkit`, `WebStorage`, `LinkedAccount`, `IObserverCallback`, `Article`, `IndexingConfig`, `Health`, `RegisterServerOptions`, `MUST_CALL_AND_RETURN_SUPER_METHOD`, `ICnChar`, `VRMSpringBone`, `DeleteBotVersionCommandInput`, `CanvasTypeProperties`, `GetCollapsedRowsFn`, `GfxInputLayoutP_GL`, `MigrationService`, `GetDomainNamesCommandInput`, `SampleAt`, `AudioModule`, `EventListeners`, `TwitchChat`, `PluginInsertActionPayload`, `MotionInstance`, `DAL.DEVICE_ID_MULTIBUTTON_ATTACH`, `RenderBatchKey`, `ProductFilterDTO`, `MsgCloseDeployment`, `Anime`, `IStateGlobal`, `IndicatorCCReport`, `DependencyContainer`, `Defs.CompactdState`, `RegExpLiteral`, `BeachballOptions`, `ZipkinSpan`, `GMxmlHttpRequestEvent`, `CertificateProfileType`, `InventoryStore`, `CustomToolbarItem`, `PiePoint`, `KubeArgs`, `anchor.web3.Connection`, `IAssetProvider`, `IBaseNode`, `ValidateEvent`, `Messages`, `Format`, `ComponentInstance`, `ParticipantsRemovedEvent`, `requests.ListAvailableSoftwareSourcesForManagedInstanceRequest`, `BlockBlobGetBlockListResponse`, `DecipherGCM`, `DeleteDatasetCommand`, `RewriteAppenderConfig`, `ChatError`, `TextProperties`, `ChangeSetQuery`, `EventName`, `UserResolvable`, `RegisterConfig`, `GunGraphAdapter`, `DebugProtocol.StackTraceArguments`, `CustomAnimateProps`, `Participants`, `Effects.SpriteEffect`, `MemberName`, `TOperand`, `SavedObjectFinderProps`, `ImageView`, `CellValueChangedEvent`, `NameSpaceInterface.Interface`, `MidwayFrameworkType`, `CoverageRunner`, `SceneActor`, `Pipeline`, `OfficeLocation`, `SelectionDirection`, `AggsMap`, `EndpointConfig`, `UpdateRoomCommandInput`, `ModelOptions`, `PlaneBufferGeometry`, `PreferenceScope`, `ListFirewallPoliciesCommandInput`, `PagerXmlService`, `KoaMiddleware`, `BTCMarkets.instruments`, `Pully`, `ShardFailureOpenModalButtonProps`, `MediaObject`, `HsCommonLaymanService`, `CommandItemDef`, `SheetRef`, `TransientStore`, `PartialPerspective`, `MenuValue`, `InvalidState`, `TemplateRoot`, `TransmartTableState`, `Closure`, `SelectReturnTypeOptions`, `TLndConf`, `IDataObject`, `vscode.QuickPickItem`, `SwiftProperty`, `AlertTableItem`, `RepeatVectorLayerArgs`, `DocItem`, `StringKeyOf`, `Wiki`, `IUsersRepository`, `GfxDebugGroup`, `MenuServiceStub`, `OpenCVOperatipnParams`, `Traversable2`, `ApolloError`, `EuiSwitchEvent`, `child.ExecException`, `ITestsService`, `PreviewDataImage`, `ChallengeData`, `SqrlKey`, `GoogleMeetSegmentationOperationParams`, `SimulateOptions`, `ConfirmDialogService`, `WalletLinkRelayAbstract`, `ITenantManager`, `BN`, `DataCollection`, `NZBResult`, `PrivateAuthenticationStore`, `GunGraph`, `StoneTypeArray`, `LocatorDiff`, `ContainerArgs`, `IUserDocument`, `GX.CompCnt`, `ExportContext`, `AlertCluster`, `EventHit`, `ThyDragStartEvent`, `IFindWhereQuery`, `StorageObjects`, `ParticleEmitter2Object`, `AddToLibraryAction`, `MapPartsRailMoverNrv`, `SubscribeActions`, `SignedMessage`, `SceneModel`, `DotenvParseOutput`, `StripePaymentIntent`, `StoreActions`, `BucketMetadataWithThreads`, `SpeechTranslationConfigImpl`, `CreateAccountCommandInput`, `ExpressionAttributeValueMap`, `MixArgs`, `Math2D.UvBox`, `SpyPendingExpectation`, `NxData`, `Dialogic.MaybeItem`, `GroupMetadata`, `Ringmodulator`, `AdBreak`, `IFilterInfo`, `RollupAggregator`, `ViewCompiler`, `IFullProps`, `ListContentConfig`, `PersistedStore`, `ResponderDimension`, `DefaultApp`, `Events.kill`, `SetupPlugins`, `EntityCache`, `ISearchState`, `OidcProviderService`, `EchartsProps`, `VehicleState`, `ToggleType`, `SubProg`, `PostMessageService`, `DescribeConnectorProfilesCommandInput`, `SigError`, `ApiOptions`, `CalendarOptions`, `MongooseQueryParser`, `DQLSyntaxErrorData`, `CdkOption`, `LastfmTopTracks`, `PackageInstallationResult`, `ClassExportDoc`, `CategoryPreferences`, `Apollo.SubscriptionHookOptions`, `SessionOptions`, `HeadingProps`, `RstStreamFrame`, `EditWidgetDto`, `PluginDebugAdapterContribution`, `CustomTag`, `Tnumber`, `AnyConstructor`, `IMinemeldCandidateConfigNode`, `oai3.Schema`, `ICompletionItem`, `HighlightSpan`, `Slide`, `GfxRenderInst`, `LogWrapper`, `IClient`, `BigSource`, `TopicForIndicator`, `IInspectorRow`, `sdk.BotFrameworkConfig`, `JOB_STATE`, `SerializeOpts`, `GradientStop`, `DeleteDatabaseCommandInput`, `XRangePoint`, `DataTypeFieldConfig`, `ResultMapper`, `UIState`, `UpdateApplicationCommandInput`, `PathAndExtension`, `ProjectService`, `MnemonicX86`, `TestBufferLine`, `CollectionService`, `IMenuItem`, `FieldResultSettingObject`, `TaskInputs`, `MappingTreeArray`, `IAuthOptions`, `ITokenService`, `SidebarService`, `FieldDeclaration`, `LoggingInfo`, `ContractABI`, `IOAuthTokenResponse`, `PriceAxisViewRendererOptions`, `WebGLBuffer`, `ts.CompletionEntry`, `T8`, `TriDiagonalSystem`, `SavedObjectsDeleteOptions`, `IMergeNode`, `StyleMap`, `PluginExtended`, `CommandBus`, `LineData`, `ExplorationInfoParameter`, `Space2DSW`, `TSize`, `BitcoinNetworkConfig`, `PlayerLink`, `IStatusView`, `RuleId`, `Structure`, `requests.ListWafBlockedRequestsRequest`, `IHawkularAlertQueryResult`, `IndexOptions`, `NetworkRequestInfo`, `FilteringPropertyDataProvider`, `SubnetMapping`, `Nodes.DocumentNode`, `LogAnalyticsParser`, `Composition`, `CacheConfig`, `interfaces.CommitType`, `IDeploymentCenterContext`, `FilterOption`, `SchemaComparator`, `ProjTreeItem`, `DeviceSelector`, `MpqFile`, `CachePolicy`, `UpdateArticle`, `MatrixClient`, `IObserverLocator`, `JsonSchema.JSONSchema`, `SemanticsFlag`, `PropertyDescriptor`, `TwingFilter`, `ContributorService`, `NotSkeletonDeep`, `ThemeContextType`, `FieldFilterRowData`, `ActionTypeExecutorOptions`, `CONNECTION_STATUS`, `SortingOption`, `IUserSubscription`, `OptionalObject`, `RefreshableView`, `IFiber`, `HyntaxToken`, `BatchNormalization`, `FilterizrOptions`, `CloseEditor`, `TypeAST`, `VRMHumanoid`, `DescribeFlowCommandInput`, `RosException`, `RecoveredSig`, `PanelConfigProps`, `Color4`, `ChildDatabase`, `Vote`, `SkipBottomButtonProps`, `XSort`, `LevelUpChain`, `BreadcrumbsProps`, `SolanaNetwork`, `Konva.Layer`, `EmbeddableRendererProps`, `GenericBinaryHeapPriorityQueue`, `TaskCustomization`, `q.Tree`, `SortableEdge`, `WebAccount`, `LobbyHouse`, `RecordedTag`, `ExecResult`, `UseQuery`, `ToneOscillatorType`, `ListTablesCommandInput`, `SubgraphDataContextType`, `MatPaginatorIntl`, `protocol.FileLocationRequest`, `ArcTransactionProposalResult`, `ParseNodeArray`, `IPartyMember`, `CheckboxState`, `BlobWriter`, `SpaceFilter`, `CreateNoticeDto`, `StartDependencies`, `ImmutableSelectorNode`, `WebViewExt`, `Reg`, `core.ITenantManager`, `IDateRangePickerState`, `RstatementContext`, `SVGPath`, `TEntry`, `$T`, `CustomDocumentStoreEntry`, `PlayerService`, `LambdaContext`, `WheelEvent`, `Simulation3D`, `requests.ListExternalDatabaseConnectorsRequest`, `IMeetingRepo`, `IPromiseRetry`, `FeatureOptions`, `DirFileNameSelection`, `CreateRegexPatternSetCommandInput`, `SysUser`, `IRequest.Options`, `Mounter`, `InteractiveState`, `WebGLActiveInfo`, `BuildDefinition`, `IndexedAccessType`, `ScriptTags`, `TFunction`, `StreakItem`, `VNodeChildren`, `FlowElement`, `VaultEntry`, `types.Output`, `UnitRecord`, `Processed`, `Lazy`, `StructureRoad`, `DidactPanel`, `ApiSection`, `Program`, `WorkerMainController`, `PrismaService`, `I2CWriteCallback`, `DebugProtocol.EvaluateArguments`, `Web3Client`, `Preflight`, `TradeService`, `StacksConfigRepository`, `LookupInResult`, `IBindingSetting`, `RouteGroup`, `FormatErrorMetadata`, `DeleteDatasetResponse`, `PresentationTreeDataProvider`, `Passenger`, `LuaParse`, `RequestState`, `EntityChangeEvent`, `ApiResource`, `GeneralOptions`, `Apply2`, `Err`, `AttachmentView`, `QueueType`, `TextView`, `ResourcePack`, `UserProfileFactory`, `WindowProps`, `ConsoleInterceptor`, `Matrix3d`, `IUtilityStoreState`, `Delivery`, `GetInput`, `OptionPureElement`, `NewRegistrationDTO`, `V1Service`, `IAddressBookState`, `KeyPairTronPayments`, `ListHttpMonitorsRequest`, `TaskExecutor`, `ElementPaint`, `RtspSession`, `EnvironmentAliases`, `Sourcelike`, `C4`, `QueryConfig`, `ConditionOperatorName`, `DependencyType`, `ChainsService`, `TaskExitedEvent`, `AbortMultipartUploadCommandInput`, `MangaListStatusFields`, `VisibilityFilter`, `Axis3D`, `ILiquidationCandidate`, `AuthUser`, `ClusterClient`, `ImportStatement`, `comparator`, `AnimGroup`, `App.IPolicy`, `DataRequestContext`, `TypeTarget`, `DetailedOrganizationalUnit`, `IAudioStreamNode`, `GetResponseBody`, `d.CompilerWorkerContext`, `UserStoreReference`, `SuggestMatch`, `AssociationCCReport`, `PollingPerformanceObserverTaskQueue`, `InputBlock`, `requests.ListStandardTagNamespacesRequest`, `HypermergeNodeKey`, `KeyState`, `GanttSettings`, `StepDefineExposedState`, `SearchAllResourcesRequest`, `GlobalPositionStrategy`, `DeeplyMockedKeys`, `messages.PickleStepArgument`, `SerializeSuccess`, `DragRef`, `WithSubGeneric`, `LineIndex`, `IJsonDocument`, `TernarySearchTree`, `gang`, `CellKey`, `ResourcePage`, `DMMF.SchemaEnum`, `LabelStyle`, `xlsx.Sheet`, `ICredentialDataDecryptedObject`, `NumberParams`, `TransientState`, `TerminalWidget`, `ECDH`, `ViewConfig`, `ProtocolConformanceMap`, `Conditional`, `ProductVariantService`, `ServiceNameFormatter`, `SceneObjectBehavior`, `JoinedEntityMetadata`, `SpriteFontOptions`, `NodeDict`, `KibanaSocket`, `ListInstancesCommandInput`, `RESTService`, `FullyQualifiedScope`, `Facebook`, `GuildDocument`, `FirestoreError`, `IVocabularyTag`, `ResponderExecutionStates`, `RoutesManifest`, `PhrasesBuilder`, `Float32List`, `ElevationProvider`, `SavedObjectManagementTypeInfo`, `EllipsisNode`, `KeymapItemEditableProps`, `IntrospectionInputValue`, `GlobalMaxPooling1D`, `UpdateDatabaseResponse`, `SCNVector3`, `MockFixture`, `SelectedState`, `ast.FunNode`, `Windup`, `PolylineProps`, `CreateInstanceProfileCommandInput`, `IResponse`, `SelectionScopeRequestOptions`, `AllQueryStringTypesCommandInput`, `TestDataset`, `PaymentDataRequest`, `CustomSeriesRenderItemAPI`, `TestExecutionContext`, `RequestProvider`, `SavedObjectSaveOpts`, `Preparation`, `ButtonTool`, `FlagType`, `BackgroundBlurVideoFrameProcessorObserver`, `ISyncedState`, `ISubscribable`, `SolanaKeys`, `QueryFunctionContext`, `BigBitFieldResolvable`, `ParameterExpression`, `StaticEllipseDrawerService`, `Web3Utils`, `ContainersModel`, `MarkdownEngineConfig`, `StyleSet`, `CancelableRequest`, `WalletType`, `MessageTypes`, `EntityDefinitionService`, `XUL.tabBrowser`, `LimitItem`, `IdentifierObject`, `SecretRule`, `RawSavedDashboardPanel640To720`, `ClockHand`, `KhouryProfCourse`, `DefaultSDP`, `IStateProps`, `TupletType`, `Web3Callback`, `WetAppBridge`, `DaffCompositeProductItem`, `OverlaySizeConfig`, `Types.RequestParameters`, `ICredentialsResponse`, `FileGroup`, `VNodeThunk`, `IWebAppWizardContext`, `EditorPlugin`, `DejaTreeListComponent`, `sdk.ConversationTranslator`, `EntityDispatcherDefaultOptions`, `StreamReader`, `StatePages`, `IClassicListenerDescription`, `DecodedToken`, `StructuredAssignment`, `Hobby`, `OAuthRedirectConfiguration`, `SessionStateControllerAction`, `Cypress.Chainable`, `StreamSpecification`, `SAXParser`, `JSONObject`, `CallFrame`, `ResolverProvider`, `CommentNotification`, `CharacterStore`, `RemovePermissionCommandInput`, `NodeRpcService`, `StateDB`, `UninstallEventData`, `ListSwipeAction`, `KickGroupUsersRequest`, `Spectator`, `Overmind`, `ClassDefinition`, `TriggerState`, `IEmployeeCreateInput`, `DockerContainerProps`, `NotificationId`, `TestCursorQuery`, `Monad`, `PlaceholderProps`, `Trackable`, `CoreDeploy`, `GroupEntity`, `StreamModule`, `MML`, `CAInfo`, `IssuerPublicKeyList`, `GeneratorOptions`, `AssetOptions`, `InternalException`, `VersionConstraintContext`, `ActiveWindow`, `StringWithEscapedBlocks`, `RGBValue`, `LoginItemProps`, `http.RequestOptions`, `requests.ListFileSystemsRequest`, `LoopMode`, `NodeCue`, `ImageTileEnt`, `PullIntoDescriptor`, `AnalyzedStyle`, `Delay`, `ContractAbstraction`, `Plane`, `TravisCi`, `DialogType`, `Invalidator`, `GX.TexPalette`, `TextDecoder`, `RequestConditionFunctionTyped`, `ReuseTabCached`, `MainPackage`, `Preferences`, `SpatialStandupRoom`, `Adb`, `ODataQueryOptionHandler`, `CardTitleProps`, `IShareButton`, `GridItemEvent`, `Bookmark`, `ScreenshotDiff`, `StubbedInstance`, `Overloads`, `BehaviorMode`, `ConfigurationContext`, `MockCSSStyleDeclaration`, `AppRecord`, `ServiceWorkerGlobalScope`, `IRouterConfig`, `AppSettingsService`, `SQLRow`, `ParamData`, `sdk.AudioConfig`, `ast.ClassDeclaration`, `RgbTuple`, `CLM.TrainScorerStep`, `MockEntityMapperService`, `DebtTokenContract`, `Dirent`, `IModify`, `NumberType`, `TransformLike`, `TaskSchedule`, `d.OptimizeJsResult`, `THREE.Group`, `WithSerializedTarget`, `ParseAnalysis`, `QuotaSetting`, `fhir.Task`, `AnyRegion`, `TxsTopicData`, `ITarget`, `Scroller`, `common.Keybinding`, `TSLintAutofixEdit`, `RouteManifest`, `TestDataService`, `EasyPzCallbackData`, `BridgeInfo`, `CreateSampleFindingsCommandInput`, `EventDescriptor`, `ValidCredential`, `Case`, `IImportedArmy`, `Verify`, `requests.ListCostTrackingTagsRequest`, `CSSRule`, `DAL.DEVICE_ID_SYSTEM_TIMER`, `CombatService`, `BodyType`, `RenderServiceMock`, `Phaser.Scene`, `IUserDto`, `TooltipProps`, `IStatRow`, `Main.LogScope`, `SubDirectory`, `Odb`, `RpcResponseAndContext`, `TabOption`, `Geolocation`, `APIGatewayEvent`, `IncomingRequest`, `Prefetch`, `DeleteServiceRequest`, `NotifyFunc`, `Desc`, `ItemCount`, `DaffAccountRegistration`, `FileStructureType`, `ToggleConfig`, `MigrationLifecycleStates`, `IEndpoint`, `AbstractProject`, `MDCActivityIndicator`, `HTMLStencilElement`, `MapboxMarker`, `TransactionAsset`, `DatasetLocation`, `ServiceBinding`, `IStatus`, `ResolvedFunctionTypeParam`, `StrategyParameter`, `GLM.IArray`, `MathToSVGConfig`, `TLinkCallback`, `IDinoContainerConfig`, `UIFileHelper`, `DropdownOptions`, `EntityInterface`, `ErrorPropertiesForName`, `ComponentWithAs`, `PaperSource`, `XRFrameOfReference`, `DownloadTarget`, `VariableDeclarationContext`, `EncryptContentOptions`, `AndroidActivityEventData`, `CoordinateXYZ`, `PluginLoader`, `KBN_FIELD_TYPES`, `LineString3d`, `IconPack`, `WebpackAny`, `MomentInput`, `PyrightJsonDiagnostic`, `ProductDetailPage`, `STPPaymentIntent`, `CommentSeed`, `BaseIO`, `AssertionLocals`, `MetaInfoDef`, `CreateAppRequest`, `ExpressionsService`, `VFS`, `EngineDetails`, `BlockClass`, `BubbleSeriesStyle`, `AbstractProvider`, `ERC20Mock`, `JKRArchive`, `CountItem`, `NonMaxSuppressionResult`, `CarService`, `DeviceTypes`, `DiscordBot`, `requests.ListDbCredentialsRequest`, `CommandDefinition`, `MapDispatchToPropsParam`, `BigNumberValue`, `JitsiRemoteTrack`, `GetDedicatedIpCommandInput`, `PointerDragEvent`, `CompanyType`, `AddressBookInstance`, `PlaceholderContent`, `RippleCreateTransactionOptions`, `CaseItem`, `IAggFuncParams`, `DynamicFlatNode`, `IDefinition`, `Poller`, `NodeFilter`, `ProjectState`, `VideoStreamRendererViewState`, `Path2`, `NotRuleContext`, `ApiMetadata`, `Clickable`, `OnPreRoutingToolkit`, `AlphaDropout`, `MessageToken`, `Delete`, `VideoCapture`, `SearchMode`, `WaitStrategy`, `FormikActions`, `ApolloReactHooks.QueryHookOptions`, `HistoryEvent`, `SendResult`, `Phaser`, `ResizeGripResizeArgs`, `DeleteDocumentCommandInput`, `SpaceType`, `peerconnection.PeerConnection`, `MultigraphRequestOptions`, `HashSet`, `DateRangeKey`, `VariableArgs`, `WordCharacterClassifier`, `PadchatMessagePayload`, `IOpdsLinkView`, `DeployStatus`, `StyleType`, `RecordOf`, `IProviderOptions`, `AnimatorFlowValue`, `TokenState`, `GestureType`, `AstDeclaration`, `BrowsingPage`, `DidDocumentService`, `AnalysisEnvironment`, `requests.ListOAuthClientCredentialsRequest`, `ITenantCreateInput`, `Int16Array`, `RingBuffer`, `SkiplistNode`, `IFileBrowserFactory`, `LhcDataService`, `DisplayValueMapService`, `BaseOptions`, `ErrorDetailOptions`, `ts.ParsedCommandLine`, `ActivityPropertyDescriptor`, `Rec`, `Babel`, `RoleData`, `TemplateAnalyzer`, `DOMElementType`, `DatabaseV2`, `PrismaClientInitializationError`, `TransformCssToEsmOutput`, `LogTracker`, `BaseBigNumber`, `ScriptObject`, `PythonVersion`, `WalletConnectProvider`, `ContributionProposal`, `FFTProgram`, `GltfFileBuffers`, `CallEffect`, `ChartScene`, `SPort`, `AppRedux`, `BpmnContext`, `ReshapePackedProgram`, `IAmazonServerGroupView`, `TestColdObservable`, `ScopedPlannerConfiguration`, `PresentationPropertyDataProvider`, `TreeMeta`, `FailedImport`, `PathComponent`, `GithubService`, `AsyncFnReturn`, `TS.Node`, `PadchatContactPayload`, `ICourseDashboard`, `ExportedDeclarations`, `ReleaseOptions`, `ListCertificateAuthoritiesCommandInput`, `Walker.Store`, `Level2`, `ServiceEndpointPolicyDefinition`, `NodeJS.ErrnoException`, `OrganizationSlug`, `CloudFrontHeaders`, `IndexKey`, `DkrLevel`, `SQLeetEngine`, `Config.DefaultOptions`, `ILineDiv`, `MessageDeserializationOptions`, `CopyDirection`, `HostRef`, `IOperator`, `BadgeInfo`, `GetAllRequestBuilder`, `requests.ListMfaTotpDevicesRequest`, `LevelService`, `IText`, `EntityComponent`, `SaveFileArgs`, `AnimationsService`, `DateTimeNode`, `SchemaQuery`, `WKNavigation`, `PayloadDictionary`, `IJoin`, `ListItemProps`, `MathOptions`, `CommitOrDiscard`, `externref`, `TokenSharedQueueResult`, `TransformerHandle`, `VdmMapping`, `ExecutionContext`, `RoosterCommandBarButtonInternal`, `UpdateIdentityProviderCommandInput`, `CreateReactClientOptions`, `NodesRef`, `RemoteTokenCryptoService`, `WorkspaceConfig`, `KnownTokenMap`, `ImportStateMap`, `IControllerAttribute`, `BreakStatement`, `SuiSelectOption`, `Size`, `DeprecatedHeaderThemes`, `Json.ArrayValue`, `RequestError`, `DAL.KEY_TAB`, `TestFixtureComponent`, `GitCommittedFile`, `GroupChannel`, `FireLoopRef`, `TypeAcquisition`, `SnackbarAction`, `social.UserData`, `DomRenderer`, `Runtime.MessageSender`, `m.Component`, `RowViewModel`, `PrerenderContext`, `BatchCreateChannelMembershipCommandInput`, `requests.ListExportSetsRequest`, `SensorElement`, `TeamCity`, `FrontmatterWithDefaults`, `MultipartFileContract`, `SyncHandlerSubsetOf`, `VideoGalleryStream`, `UI5SemanticModel`, `BotConfig`, `RouteConfig`, `ArgumentParser`, `HeaderComponent`, `FileDefinition`, `MigrationBuilder`, `GroupUserList`, `ExpressionAstExpression`, `ThyAbstractOverlayRef`, `UID`, `ChangeTracker`, `EngineArgs.ApplyMigrationsInput`, `PaginatedQueryFetchPolicy`, `RemoteUserRepository`, `StaticConfig`, `RenderedChunk`, `OperationElement`, `DataResolver`, `SuccessAction`, `PDFContext`, `PredictableHook`, `NameValue`, `CppCbToNew`, `ChartDef`, `IConfigData`, `AudioConfig`, `Endomorphism`, `ComponentSize`, `QualifiedRule`, `ITreeEntry`, `MetadataReader`, `TwingOutputBuffer`, `RuleData`, `LoanCard`, `IServerResponse`, `TabStyle`, `TAccumulate`, `DecodeContinuouslyCallback`, `Cumulative`, `WatchDecorator`, `IPageModel`, `CreateComponentCommandInput`, `ValueReadonly`, `RendererPlugin`, `Semester`, `EventFilter`, `Design`, `BackstackEntry`, `CommandService`, `DriveNumber`, `IosBuildName`, `ColdObservable`, `Float32Array`, `ContentRepository`, `DeploymentFileMapping`, `TStore`, `DomController`, `AccountsStore`, `Movement`, `PathPredicate`, `ALSyntaxWriter`, `ShipPlugin`, `DraggableElement`, `GX.TexCoordID`, `Signature`, `IRepository`, `Exact`, `IIStateProto`, `ResourceKind`, `requests.ListInstanceDevicesRequest`, `GasOption`, `WebGL2DisjointQueryTimerExtension`, `nockFunction`, `LayersModel`, `Machine`, `ClockFake`, `ChampionsLeagueStat`, `UpdatableChannelDataStore`, `PartialBot`, `AttributeModel`, `SourceNode`, `KeyboardLayoutData`, `ExportCollector`, `SpotifyApi.CurrentUsersProfileResponse`, `IContainerType`, `TypedTransaction`, `UpdatePipelineCommandInput`, `StringNote`, `n`, `StepBinary`, `listenerHandler`, `ApiTypes.Feed.Like`, `IndexStats`, `TEX1_Sampler`, `requests.ListVolumeGroupsRequest`, `CampaignTimelineChanelPlayersModel`, `STPPaymentHandlerActionStatus`, `BrowserWindow`, `CalibrationResponseAction`, `UiThread`, `TransactionsResponse`, `StructType`, `Layer`, `GfxTextureSharedP_WebGPU`, `SwaggerPath`, `StoryListener`, `ContextMenuAccess`, `BroadcastMode`, `ListTournamentRecordsRequest`, `ResponsePromise`, `AllPlatforms`, `DeletePolicyRequest`, `SelectionManager`, `Moize`, `TypeError`, `AdaptServer`, `MediaQueries`, `CreateCrossAppClickHandlerOptions`, `IImposer`, `GetParams`, `ParsedTranslationBundle`, `RelationsInputType`, `SqrlTest`, `MacroActionId`, `RouteFilterRule`, `ConverterDiagnostic`, `HashParams`, `postcss.Declaration`, `ProdoPlugin`, `RunSuperFunction`, `ReplicationConfiguration`, `messages.Rule`, `EditorState`, `AppConfig`, `A4`, `ListWebhooksCommandInput`, `IExecuteCommandCallback`, `BooleanLiteralExpr`, `CalendarManager`, `Cursor`, `SubsetConstraints`, `ShaderSpec`, `NumberNodeParams`, `NodeWithPosition`, `OpenAPI.Parameter`, `ColumnSummary`, `InterfaceName`, `UrlGeneratorInternal`, `ThyClickDispatcher`, `GridGraph`, `Workunit`, `InputChangeEvent`, `ApiEnumMember`, `requests.ListDbSystemsRequest`, `PanGestureHandlerGestureEvent`, `TokenRecord`, `IntegerType`, `CompletedLocalIpcOptions`, `TimesliceMaskConfig`, `IUserPPDB`, `IBeacon`, `TabularDataset`, `VirtualInfo`, `OutputData`, `EdmxActionImport`, `EntityMapEntry`, `ZoneAndLayer`, `ISummaryTree`, `RenameMap`, `WithUserAuthOptions`, `MenuContextProps`, `DlpServiceClient`, `BackgroundTrack`, `BN.Value`, `DirectionMode`, `DAGDegrees`, `ethers.ContractTransaction`, `FeatureService`, `HandlerInput`, `FilesystemEntry`, `TorrentInfo.MediaTags`, `GraphStoreDependencies`, `AggsCommonSetup`, `TileInfo`, `MicrosoftDevTestLabLabsResources`, `Air`, `TickAutomationEvent`, `ChangeListener`, `RecordsGraph`, `ExpectedCompletionEntryObject`, `EChartOption`, `ItemView`, `ConnectionContext`, `EnumStringHelper`, `RejectOnNotFound`, `ts.FormatCodeSettings`, `CoverConfiguration`, `SymbolParam`, `ResultError`, `STORES`, `IExtendedCommit`, `LogicalKeyboardKey`, `FormatParams`, `GetDomainRecordsRequest`, `ExecOutput`, `RouteValidator`, `MinifyOptions`, `ContractBuilder`, `MetaDataModel`, `WorkspaceSummary`, `keyboardKey`, `PGOrbitsDef`, `DATA`, `TestabilityRegistry`, `UpdateCustomEndpointDetails`, `AccountGameCenter_VarsEntry`, `TAction`, `TemplateItem`, `MovimientosService`, `MutableVideoPreferences`, `CdkVirtualScrollViewport`, `HTMLAudioElement`, `PublishJob`, `LinesChangeEvent`, `DiceRollerPlugin`, `IStudy`, `CompiledCard`, `StoreNames`, `FirstValue`, `GfxFormat`, `LContext`, `IFlowItemComponent`, `CompletionsCollector`, `FunctionParameter`, `PublishCommandInput`, `OpenSearchDashboardsReactPlugin`, `IScoutStems`, `HMAC`, `TabStrip`, `AnalyticUnit`, `DP`, `EffectFunction`, `AddressBookService`, `FuseResult`, `PlayerIndexedType`, `T19`, `RenderObject`, `ArmService`, `RepositoryData`, `CodeFixAction`, `TagValueType`, `TypeSpec`, `WgConfigFile`, `TsunamiContext`, `LeanDocument`, `MockAttributeMap`, `RegExpMatchArray`, `ForAllSuchThatInput`, `ClientWrapper`, `MappableType`, `d.DevServer`, `paper.PointText`, `TriggerEventCommand`, `SavedObjectsClientContract`, `MActorSignature`, `FieldsAndMethodForPositionBeforeCurrentStrategy`, `ThematicDisplayProps`, `OTRRecipients`, `StringListNode`, `AccountManager`, `FormatTraits`, `Particle`, `UploadProps`, `CppConfigItem`, `GetConfigurationSetEventDestinationsCommandInput`, `ICellData`, `ISubscribe`, `Undo`, `CreatePolicyVersionCommandInput`, `IconifyAPIQueryParams`, `Statistics`, `ActionCreatorFactory`, `MockUser`, `SummaryCalculator`, `GraphLayoutType`, `IStreamChunk`, `TypeormRawSetting`, `WishListRoll`, `JSONSchemaObject`, `TextField`, `PaginationComponent`, `InvalidArgumentException`, `UserAgent`, `TLocaleType`, `MarkupElement`, `Manager`, `OcsConnection`, `PointerAllocationResult`, `ArcoOptions`, `ListServicesCommandInput`, `XMLBuilder`, `NodeKey`, `ActionsSdkConversation`, `CodeError`, `CoreState`, `IReaderState`, `ConnectionGraphicsItem`, `BooleanType`, `chrome.tabs.TabActiveInfo`, `TweetEditorState`, `IFooter`, `MiToolsSyncData`, `MessageTimer`, `Config`, `GfxRenderTargetDescriptor`, `StorexHubApi_v0`, `TdpClient`, `ObjectContent`, `ts.LineAndCharacter`, `SymShape`, `WWAData`, `BindingContext`, `BoxPlotData`, `RouteOpt`, `BaselineInfo`, `EmailConfirmationsStore`, `TextureCubeMap`, `DiagramMaker`, `MapLocation`, `IPermissionReturnType`, `CalendarEventsCache`, `MultiTrie`, `DescribeDBSnapshotsCommandInput`, `CodeActionCommand`, `Uint64Array`, `ApplicationStub`, `CFDocsDefinitionInfo`, `HttpBatchLinkHandler`, `ReactiveArray`, `ResolvedAtomType`, `WorkRequest`, `PortingProjects`, `HoverInput`, `TypeScriptVersion`, `DetectorConfiguration`, `ReportingNotifierStreamHandler`, `SysTask`, `RecoilValueReadOnly`, `ConfirmDialog`, `MessageSignature`, `tcp.Connection`, `From`, `VirtualMachineRunCommandUpdate`, `Importer`, `KeyList`, `IEventType`, `_PresignUploadRequest`, `BasicUnit`, `NimAppState`, `GetSpaceParams`, `IIconOptions`, `ExpressLikeRequest`, `MenuIDs`, `BazelWorkspaceInfo`, `CodeFixContextBase`, `ActivityState`, `GetMyProfileCommand`, `FunctionAppContext`, `APIError`, `StructProp`, `ExpandedEntry`, `ImageEdits`, `ProjectTaskProperties`, `NotAuthorizedException`, `GfxWrapMode`, `BootOptions`, `AdamOptimizer`, `Paging`, `BoxStyleProps`, `ICommandWithRaw`, `AddValue`, `ImageModel`, `DataRecord`, `SPClientTemplates.FieldSchema_InForm`, `IConnected`, `DescribeWorkspaceCommandInput`, `DataProcessor`, `Flag`, `EmitResolver`, `AuthenticateModel`, `DeploymentStatus`, `ListPortalsCommandInput`, `NodeJS.Platform`, `Compose`, `TestConfig`, `Component`, `ObjectFactory`, `ConnectionDTO`, `CaretPosition`, `request.Test`, `ValidationException`, `PageDependencies`, `CreateIdentityProviderCommandInput`, `DinoErrorController`, `LegendPositionConfig`, `RichLedgerRequest`, `JGOFIntersection`, `AveragePooling2D`, `RectL`, `Locales`, `IDocumentSystemMessage`, `DaffAddressFactory`, `DecimalFormatOptions`, `d.OutputTargetDistLazy`, `GhcModCmdOpts`, `Trace`, `TodoModel`, `PerformanceObserverEntryList`, `TypeDeclaration`, `BridgeToken`, `TranslationStorage`, `moneyMarket.oracle.PricesResponse`, `TypeEmitOptions`, `StorageOptionsChangeData`, `SupervisionResult`, `ILoader`, `PercentileRanksMetricAggDependencies`, `UserRepository`, `PackageDiffImpl`, `ToolingLog`, `ListColumnSetting`, `MappingFn`, `React.MouseEventHandler`, `StartupInfo`, `IterableReadable`, `Tristate`, `XmlStateConsumer`, `ISocket`, `StateT1`, `Box3`, `PlanValidationOutcome`, `SignatureOptions`, `DDL2.IField`, `Argument`, `SchemaConstructor`, `DataListItem`, `FieldHierarchy`, `RefreshService`, `IReport`, `IBankAccount`, `Scheduled`, `Comments`, `MaybeArray`, `BuildFailure`, `IMailTransferAgent`, `ResourceModel`, `TensorBuffer`, `ActionKey`, `React.AnimationEvent`, `RTCPeer`, `GuideEntryType`, `Random`, `LogMessage`, `IResource`, `AuthenticationResult`, `ParseConfigHost`, `TelemetryWorker`, `FunctionAppRuntimeSettings`, `IFeatureOrganization`, `YfmToc`, `AutorestContext`, `RestoreFn`, `Drone`, `BinaryBody`, `SfdxCommandDefinition`, `ReplicationState`, `ShaderVariable`, `UseSavedQueriesReturn`, `SecondaryIndex`, `AtlasResourceItem`, `Deps`, `DescribeDomainsCommandInput`, `Disembargo_Context`, `IViewPort`, `IDejaGridColumn`, `IBasicProtocolMessage`, `requests.ListCloudExadataInfrastructuresRequest`, `DisplayObject`, `RequestWithUser`, `TaskFolder`, `DocumentDelta`, `Http3FrameType`, `ProfileNode`, `ITabData`, `IGlTFParser`, `IOpenApiImportObject`, `IAssetsProps`, `LeafNode`, `MoveT`, `t.Context`, `android.webkit.WebView`, `Hooks`, `ServerConfigResource`, `PeerSetupWithWallets`, `WebpackConfigurator`, `SignalState`, `ChangeTheme`, `CSSStyleDeclaration`, `BinaryEngine`, `DashboardCollectorData`, `CombinedField`, `ValveState`, `IApiConnection`, `op`, `ZoomDestination`, `IListProps`, `ListDetectorsCommandInput`, `Simulation`, `FileMatcherPatterns`, `IPercentileAggConfig`, `IRelease`, `SizeT`, `MethodDescriptor`, `NavigatorOptions`, `Lookup`, `ScopedLabel`, `ExpectApi`, `MapGroup`, `SRWebSocket`, `RX.Types.DragEvent`, `VisitResult`, `InputFieldDefinition`, `AccountBase`, `AaveV2Fixture`, `juggler.DataSource`, `LemonTableColumn`, `HsvaColor`, `CommentThread`, `MySQL`, `OrganizationRepository`, `MaterialOptions`, `AbiEntry`, `Params`, `Screen`, `PropertiesSource`, `Counter2`, `AggregatedStat`, `ApolloServer`, `CertificateVerify`, `ListSubscriptionsResponse`, `WebSocketLink`, `JiraIntegrationService`, `NineZoneManager`, `UpdateComponentCommandInput`, `Url`, `RangeBucketAggDependencies`, `TranslationConfig`, `RouterSpec`, `NextResponse`, `VersionDataService`, `DocumentSpan`, `ILicenseState`, `RuleSet`, `DependencyManager`, `Erc20Mock`, `TempFile`, `WatchOptions`, `ObjectUpdatesService`, `PusherChannel`, `FinalConfig`, `IStringFilter`, `GeneratorTeamsAppOptions`, `City`, `Basic`, `BeaconBlockHeader`, `PiProjection`, `ApplicationRepository`, `EPerspectiveType`, `ExistsExpression`, `ProductReview`, `BaseToken`, `BorderStyleProps`, `IconInfo`, `Animator`, `LocaleTree`, `DescribeRegistryCommandInput`, `ModelStore`, `IDict`, `SpriteArgs`, `TextChunk`, `SynthIdentifier`, `CustomCallAst`, `ResolveInfo`, `StockItem`, `RuntimeContext`, `ConstrainDOMString`, `TransferTransaction`, `PackageManifest`, `CallGNode`, `RTCDataChannel`, `LayoutElement`, `SharingSession`, `DbDrop`, `MdcSlider`, `GlobalChartState`, `ResponseFactory`, `PathFilterIdentifier`, `ErrorCollection`, `ErrorHandlingResult`, `ListApplicationsCommandInput`, `ComposibleValidatable`, `ShadowCastingLight`, `IScalingPolicy`, `FirebaseOptions`, `ProjectsActions`, `RegistryService`, `Highland.Stream`, `Thickness`, `PromiseOr`, `ITccProfile`, `ResolvablePromise`, `ChartProps`, `AppearanceMapping`, `ObjectFieldNode`, `messages.Attachment`, `CommandControlMessage`, `ListSettings`, `DependencyOptions`, `ICertificate`, `FastFormFieldMeta`, `CratePackage`, `ITest`, `RecordObject`, `Survey.Question`, `ProxyServerSubscription`, `TimelineMax`, `EncryptedWalletsStore`, `JestExt`, `ITranslator`, `NavItemProps`, `SynState`, `ng.IAugmentedJQuery`, `PortRecord`, `CommittedFile`, `FilamentSpool`, `AnyNgElement`, `RouteService`, `StreamInfo`, `PnpmShrinkwrapFile`, `ServiceWorkerVersion`, `CellEditor.CellConfig`, `RequestOptions`, `MapControls`, `LocalOptions`, `WalletState`, `ToggleableActionParams`, `KC_PrismHit`, `NetconfForm`, `ts.ParenthesizedExpression`, `FieldErrors`, `JsonFormsCore`, `ICounter`, `AllOptions`, `JSXTemplate`, `Importance`, `DataContextGetter`, `SendOverrides`, `GalaxyMapIconStatus`, `Int64Value`, `ConchQuaternion`, `NativeInsertUpdateOptions`, `EnvelopesQuery`, `ExpressionFunctionOpenSearchDashboards`, `IOracleListener`, `OffsetOptions`, `StrictValidator`, `PutIntegrationCommandInput`, `InputSettings`, `translateMapType`, `Big`, `HsvColor`, `ConsoleWidget`, `Func1`, `VisualEditor`, `ReportTaskParams`, `ProposalTransactionJSON`, `PageInterface`, `IRootElasticQuery`, `OptionNode`, `OpenSearchDashboardsDatatable`, `ComboBoxGroupedOption`, `IRect`, `NavigationContainerRefWithCurrent`, `SerializeImportData`, `TorrentInfo.Info`, `NamedMatchMediaProps`, `IDimensions`, `Learnset`, `ConnectionType`, `PageInstance`, `WebpackChain`, `EvaluationFunction`, `ThrowIterable`, `RopInfo`, `IGroupSharingOptions`, `Components`, `requests.ListDrgAttachmentsRequest`, `OperationsListOptionalParams`, `LoopConverter`, `ArgumentsType`, `TableDistinctValue`, `ShaderAttributes`, `BaseError`, `ChildArenaNode`, `AbridgedFormatErrorMetadata`, `IBidirectionalIterator`, `SignalingClientEvent`, `ValidityState`, `GeometryQuery`, `Expect`, `ScoreInstrument`, `DatabaseConfig`, `interfaces.Newable`, `IndexPatternFieldMap`, `IBufferView`, `StructuredTypeSchema`, `JpegEmbedder`, `ResolverMap`, `StageInterview`, `TimelineKeyframeStyle`, `SplittedPath`, `CoreImagesContract`, `RPCRequestPayload`, `MappingFactor`, `YouTube`, `Num`, `PaymentProvider`, `FunctionComponentElement`, `MessageCallback`, `HTMLUListElement`, `LimitToPipe`, `CellInterface`, `Target`, `PDFString`, `RollupClient`, `MockObject`, `V1Scale`, `ObjectAssertionMember`, `Loaded`, `ICredential`, `DragCheckProps`, `RangeError`, `requests.ListBdsInstancesRequest`, `FieldDetails`, `ParseExpressionTextResults`, `ProcessApproachEnum`, `UnitValue`, `GoalItemViewModel`, `Shell`, `ITranscriber`, `ListrObject`, `TestableApiController`, `SlsConsoleFile`, `RecordC`, `AppMenuItem`, `ServerSideVerificationOptions`, `Compact`, `IGistMeta`, `HintItem`, `MessageSpecification`, `ChangeNode`, `SubtleCrypto`, `Reaction`, `ILangImpl`, `UserPositionsAccount`, `LoggerProperties`, `ResourceDifference`, `ReportIndicator`, `SocketIOClient.Socket`, `CustomPropertySetUsage`, `PathfindingGraph`, `IDateStatistics`, `FileEditAction`, `IMatrixEventProcessorResult`, `ConfigValueFormat`, `ExpressionModel`, `PhrasesFilter`, `VAStepWord`, `OcpuUtilizationInfo`, `IModalListInDto`, `Node.DepositParams`, `Namer`, `VertexFormat`, `IStandardEvent`, `UpworkService`, `RequestCancelable`, `HEvent`, `React.RefObject`, `ConnectionManagerState`, `vscode.CodeLens`, `FormPayload`, `CipherCollectionsRequest`, `IResolverObject`, `requests.CreateProjectRequest`, `HammerLoader`, `MultisigAddressType`, `CheckpointTrie`, `AnimationConfig`, `CppRequestSpan`, `MySQLClient`, `ValueOrFunction`, `StyleRendererProtocol`, `EmailConfig`, `OpenSearchDashboardsDatatableColumn`, `GPUAdapter`, `IServerFS`, `MarketCreatedInfo`, `ModuleWithProviders`, `OpenCommand`, `CursorDirection`, `OrderByClause`, `Moized`, `PreparationTool`, `Libraries`, `DebugProtocol.Source`, `IsNot`, `Protocol.ServiceWorker.ServiceWorkerVersion`, `Archive`, `TimelineGridWrapper`, `MemoryStorageDriver`, `GethRunConfig`, `InjectCtx`, `IMdcSelectElement`, `TreeAdapter`, `EthereumClient`, `PlacementConstraint`, `PaginateOptions`, `BlobId`, `IntervalSet`, `puppeteer.ConnectOptions`, `BinaryToTextEncoding`, `PubArt`, `ExpandedNodeId`, `Trees`, `BoardSlice`, `CausalRepoObject`, `FileAnalysisResult`, `UnderscoreEscapedMap`, `Mismatch`, `ProcessingEvent`, `SystemPortalSelectionTag`, `BotTimer`, `EntityIdentity`, `prettier.Options`, `QuadViewModel`, `ActionInterval`, `AggregatePriceRepository`, `TypeClass`, `UnaryOpProgram`, `FileDescription`, `GetImportJobCommandInput`, `ExpectedDiagnostics`, `VaultOptions`, `HighlighterProps`, `LocalizedText`, `TypeGuard`, `OffsetRange`, `ts.server.ScriptInfo`, `NSFileManager`, `TaskDetails`, `IINode`, `VoiceAssistant`, `VideoFile`, `TimelineTotalStats`, `UserEntity`, `columnTypes`, `MatMenuPanel`, `ColumnDescription`, `InterpolateExpr`, `CircleObject`, `ContactSubscriptions`, `VisibleTextLocator`, `AggregateResponse`, `NotificationProps`, `StateForStyles`, `TestStep`, `FrontCardsForArticleCount`, `IButtonClickEvent`, `CompileResult`, `ListComprehensionIterNode`, `CodeModExports`, `SCClientSocket`, `WirePayload`, `PointList`, `YargsArgs`, `Marker`, `BuilderDataManagerType`, `HdLitecoinPayments`, `ListResolverEndpointsRequest`, `BetterMap`, `Responder`, `Module1`, `SHA256`, `CreateDatasetCommand`, `RawSkill`, `DataModel.ChangedArgs`, `ToastProvider`, `NotifyMessageDetailsType`, `TsConfig`, `OptionParams`, `ValidationConstraints`, `HostedZone`, `IParticle`, `ParameterValueDefinition`, `http.ClientRequest`, `DeleteUserRequest`, `IViewHandler`, `RuleType`, `SankeyPoint`, `GetDatabasesCommandInput`, `BoxCache`, `SecurityDataType`, `ReleaseChannel`, `GetFileOptions`, `DescribeWorkspacesCommandInput`, `BottomSheetOptions`, `ListDatasetsCommandOutput`, `ChannelPresenceEvent`, `NormalizedEsmpackOptions`, `PageMeta`, `LngLatAlt`, `ModelSchemaInternal`, `SelectorQuery`, `ImageMapperProps`, `GeocoderQueryType`, `BackendAPIService`, `TagListQueryDto`, `ExtraValues`, `MIRPrimitiveListEntityTypeDecl`, `CrossConnectMapping`, `EllipticCurves`, `AppletType`, `GfxrRenderTargetDescription`, `CreateArg`, `ISplitIndex`, `ISPHttpClientOptions`, `requests.ListQuickPicksRequest`, `DateInterval`, `DiagnosticOptions`, `ErrnoException`, `EditorPosition`, `RequestedServiceQuotaChange`, `BlogService`, `GrowableFloat64Array`, `PersistItem`, `PDFRawStream`, `LoginInput`, `CompletrSettings`, `ColumnConfig`, `LiteralCompiler`, `ts.ReturnStatement`, `GitError`, `BlockDisk`, `INativeTagMap`, `Sorter`, `NineZoneStagePanelManagerProps`, `ObjectData`, `TokenResponse`, `ExternalLoginProviderInfoModel`, `ObjectAny`, `CacheStorage`, `TournamentRecordList`, `SharedArrayBuffer`, `LuaDebug`, `FN`, `Method`, `TexturizableImage`, `IEndExpectation`, `PipelinesGraphics`, `StepFunction`, `InlineResolveOptions`, `ExecutionInfo`, `Room`, `SystemMessageProps`, `Funding`, `ICSSInJSStyle`, `requests.ListConsoleHistoriesRequest`, `RotationalSweep`, `SnackbarState`, `TSESTree.MemberExpression`, `Calc`, `WlMedia`, `NDArray`, `PropDecorator`, `Detector`, `DescribeCodeBindingCommandInput`, `ZesaruxCpuHistory`, `ScanDirection`, `DimensionDetails`, `AddRepositoryCommand`, `OperationResponse`, `JSChildNode`, `SearchServiceStartDependencies`, `GetRowHeightFn`, `CryptoFishContract`, `SVGFilterElement`, `CallMethodRequestLike`, `three.Object3D`, `FetcherOptions`, `AppConfiguration`, `KeyEvent`, `BottomSheetParams`, `CohortRepresentation`, `DefUse`, `TProviders`, `Integer`, `TypedEvent`, `RestyaboardItem`, `TemplateService`, `TaskIDPath`, `GroupName`, `IList`, `TSetting`, `SnapshotRestoreRequest`, `TimeResolvable`, `ToastData`, `SwankRawEvent`, `SwingTwistSolver`, `DemographicsGroup`, `PreferenceSchema`, `ListAppsCommandInput`, `SubscribableEditionComboboxItemDto`, `FlowItemComponent`, `JSONPropPath`, `DeleteDBInstanceCommandInput`, `tfconv.GraphModel`, `MirrorDocumentSnapshot`, `Injectable`, `Lens`, `MigrationMap`, `Int8Array`, `GeneratorResult`, `IChart`, `ScriptDataStub`, `QueryExpressionContext`, `ButtonColors`, `PacketMember`, `GroupCurrencyCode`, `ScreenshotService`, `QueryMap`, `CrudOptions`, `UseMutation`, `cc.Prefab`, `GraphQLError`, `MessageConfig`, `StandartParams`, `Autopanner`, `Typeless`, `PriceOracle`, `ScopeOptions`, `OverlayContainer`, `QueryRequest`, `ServiceWorkerRegistration`, `AtomGridmaskImageElement`, `RiskViewEntry`, `GCPubSubServer`, `requests.ListFastConnectProviderServicesRequest`, `Column`, `SharedTestDef`, `LambdaExpr`, `SubscriptionObserver`, `TestFormat`, `CacheData`, `DescribeEngineDefaultClusterParametersCommandInput`, `BlobEvent`, `RouteView`, `AssertionError`, `IAssetInfo`, `VisualizeEmbeddableContract`, `ANodeStm`, `V1Certificate`, `BotSpace`, `AlertId`, `UpdateApp`, `StackNode`, `MediaPlayer`, `CombineOutputResult`, `LocaleType`, `DataFetcher`, `apid.LiveStreamOption`, `SubmissionEntity`, `ChoicesType`, `ContactList`, `ResponsiveSpace`, `VscodeSetting`, `ByteSize`, `ClaimDTO`, `NormalizedPapiParameters`, `VerifiedHierarchy`, `DMMF.ModelMapping`, `StartServicesAccessor`, `OasPathItem`, `StateTransition`, `CategoriesService`, `THREE.WebGLCapabilities`, `SchemaNode`, `GamepadButton`, `PromptItemViewModel`, `PoolState`, `IRoundResult`, `WithMetadata`, `RadixAccount`, `QueryStatus`, `ObjectNodeParams`, `MaybeDate`, `BackupSummary`, `UsageStats`, `Rental`, `d.ComponentCompilerStaticProperty`, `BackgroundColor`, `CloudAccounts`, `SliderState`, `Dynamics`, `CreeperPoint`, `requests.ListDrgRouteRulesRequest`, `ListGroupsRequest`, `EventPriority`, `CmsGroup`, `StrokeDrawingContext`, `AnimKeyframe`, `Mutation`, `ICustomViewStyle`, `FaastError`, `WalletDeploymentService`, `StatefulDeviceManager`, `TaskState`, `ICrudListQueryParams`, `BaseEventOrig`, `ChainGunLink`, `DiffResultMessage`, `PlasmicComponentLoader`, `NumberRange`, `ISequencedOperationMessage`, `ColorModeRef`, `BrowserContextOptions`, `ESLMedia`, `TransmartRelationConstraint`, `Events.pointerdragstart`, `UniListItem`, `ExtendedType`, `TsSelectionListComponent`, `TriggerPosition`, `interfaces.Binding`, `FieldsSelection`, `ReplayTick`, `NavigationParams`, `TinyPg`, `SearchIssuesAndPullRequestsResponseItemsItem`, `AutoforwardState`, `VirtualNetworkWaiter`, `HttpEffect`, `DbTokenMetadataQueueEntry`, `PreviousSpeaker`, `VueDecorator`, `VisualizeTopNavProps`, `IFuture`, `DeclarationParams`, `SubscriberAndCallbacksFor`, `BaseSourceMapTransformer`, `datePickerModule.DatePicker`, `AnalyzerService`, `IPost`, `Delaunator`, `SearchOptionModel`, `PutLoggingOptionsCommandInput`, `PrunerConfig`, `SMTConstructorGenCode`, `BundleModuleOutput`, `Ranking`, `DescribeParameterGroupsCommandInput`, `HTMLMediaElement`, `ErrorService`, `TimeSheetService`, `GitTag`, `AngularFireOfflineDatabase`, `ForeignKey`, `SRT0`, `Iterable`, `AxiosInstance`, `IInfectionOptions`, `DkrObject`, `ILaunchOptions`, `AdapterPool`, `MongoClient`, `ScreepsReturnCode`, `ItemList`, `Benchmark.Event`, `UseSelectProps`, `pxt.Map`, `MeshStandardMaterial`, `RequesterBlockMap`, `CollectionContext`, `Clipboard`, `https.RequestOptions`, `IPuppet`, `NodeToVisit`, `CKEDITOR.eventInfo`, `alt.Vehicle`, `BodyOnlyAcceptsNumbers`, `T9`, `SubEntityLocationProps`, `IOnSketchPreviews`, `MessageReadListener`, `ColorSwatchProps`, `Name`, `ProgramObjects`, `GatewaySession`, `Fragment`, `SqipImageMetadata`, `Mmenu`, `LQueries`, `tl.FindOptions`, `RouteModules`, `Clients`, `ObservableOption`, `InstallationsFile`, `PartialState`, `AssembledReportGraphics`, `MenuStateReturn`, `ProxyType`, `PluginLoaderService`, `SpyObj`, `ByteOrder`, `MainTab`, `CustomAtom`, `CoreTypes.TextAlignmentType`, `RARC.JKRArchive`, `MyDirectoryTree`, `Quantifier`, `KCDHandle`, `CustomSprite`, `PlaneAngle`, `IMergeFile`, `SankeyLink`, `AuthType`, `SubTrie`, `ConsCell`, `TaskOperations`, `KeccakHash`, `InternalBulkResolveParams`, `RSAKeyPair`, `TombFinance`, `EnumEntry`, `BuildStyleUpdate`, `StylusNode`, `TranslatableService`, `MessageSecurityMode`, `ChangeFilter`, `DrawParams`, `SingleResponseModel`, `PbEditorElement`, `NavigationAction`, `HttpFetchOptions`, `DataFrameAnalyticsListRow`, `PlacementResult`, `DartDeclarationBlock`, `JupiterOneClient`, `RectAnnotationStyle`, `ValueDescPair`, `AuthenticateFacebookInstantGameRequest`, `FileWithMetadata`, `Y.XmlText`, `FileCodeEdits`, `IIssueParms`, `AuthenticatedSocket`, `DisplayPartsSymbolWriter`, `Hex`, `LiveDatabase`, `OrderByNode`, `CacheContent`, `DeploymentImpl`, `pulumi.Resource`, `PortalInfo`, `IAccountDetails`, `TableOptions`, `HttpStatusCode`, `ColorScheme`, `d.Encapsulation`, `CreateMeetingWithAttendeesCommandInput`, `TutorialModuleNoticeComponent`, `Kafka`, `AxisContext`, `MeetingParticipants`, `C7`, `MVideoFullLight`, `OnFetchEventFn`, `AnnotationsOptions`, `IEventStoreData`, `MapNode`, `CreateGroupCommand`, `StateInterface`, `MentionInfo`, `SempreResult`, `StylePropConfig`, `ServiceGetPropertiesResponse`, `CommandMetadata`, `GPUBufferUsageFlags`, `GameScene`, `Keybinding`, `ConformancePatternRule`, `Mocks`, `Mine`, `HTMLInputOptions`, `Streak`, `IPriceAxisView`, `Accumulator`, `ts.Statement`, `ContractCallResults`, `Discord.GuildMember`, `ILoadbalancer`, `Stitches.ScaleValue`, `Distributes11`, `NodeTypes.IMessagingService`, `IXMLFile`, `Initialization`, `BlinkerDevice`, `ILoggedInUser`, `TransactionEventBroadcaster`, `RouterOutlet`, `EntityCreate`, `PointCloudOctreeGeometryNode`, `FluidBox`, `HsLayerManagerService`, `OctokitProvider`, `PartyJoin`, `IHsv`, `ImportAdder`, `MatSlideToggleChange`, `VerificationClientInterface`, `IComponentComposite`, `requests.ListComputeGlobalImageCapabilitySchemasRequest`, `BaseFee`, `ServiceList`, `ClassSession`, `ISettingRegistry.ISettings`, `AngularFire`, `ExtendedGroupElement`, `vscode.CompletionItem`, `JSMs.Services`, `WorkspaceResourceName`, `RequestType0`, `Dsn`, `FocusedCellCoordinates`, `ENR`, `NexusObjectTypeDef`, `TypePath`, `ObjectDeclaration`, `Containers`, `SourceData`, `OutputAdapter`, `NgxsDataStoragePlugin`, `MIRInvokeBodyDecl`, `TransmartAndConstraint`, `SharedContentInfo`, `EventAccumulator`, `KibanaResponse`, `ArDB`, `BlinnPhongMaterial`, `RowData`, `DashboardContainerOptions`, `InstanceManager`, `DaffCartPaymentFactory`, `MockDirective`, `keyboardState`, `TrackedImportAs`, `AdShowOptions`, `PositionStrategy`, `AsyncOptions`, `ASTResult`, `AggregationMode`, `ToneAudioBuffers`, `SourceDataItem`, `PromptModule`, `AppEvent.Stream`, `ProjectExtentsClipDecoration`, `Taro.request.Option`, `PackageConfig`, `RateLimiter`, `CatalogEntry`, `ListMemberAccountsCommandInput`, `CirclinePredicate`, `StartJobRunCommandInput`, `IResources`, `Types.ObjectId`, `ResourceArray`, `ReferenceSummary`, `AutoFilter`, `MatchedPointType`, `ThisParameterType`, `D`, `SurveyTemplateRendererViewModel`, `FBSDKSharing`, `NetworkContracts`, `CORSOptions`, `ReleaseProps`, `PromoCarouselOptions`, `ICategoryCollection`, `RendererResult`, `Protobuf.Type`, `BitMap`, `WorkspaceSettings`, `Rx.Subject`, `CanvasContext`, `InformationPartitionElementProps`, `InterfaceServerResponse`, `TagList`, `IOrganizationSet`, `TypeExpression`, `AlignValue`, `NexusExtendTypeDef`, `MockResponse`, `LambdaFunction`, `ProviderMessage`, `ResponseHeader`, `IWriteOptions`, `MatchedSegments`, `pxt.PackageConfig`, `CallbackMethod`, `SlashCommandContext`, `UpdateImportInfo`, `Alarm`, `SelectListItem`, `WebampWindow`, `Decorator`, `Technique`, `NgxFeatureToggleRouteGuard`, `FBXReaderNode`, `WKWebView`, `HyperlinkMatch`, `SelectQueryNode`, `U2NetPortraitConfig`, `MdxModelInstance`, `SdkAudioStreamIdInfoFrame`, `BEMData`, `ListSecretVersionsRequest`, `EvmNetworkConfig`, `Offset`, `RtkResourceInfo`, `Sampler3DTerm`, `IdentityDictionary`, `RtkQueryMonitorState`, `ComponentOptions`, `ExecutableSpec`, `ISdkStreamDescriptor`, `Render`, `ParsedSelectorAndRule`, `PropertyExt`, `MappingEvent`, `PropertyTreeNodeHTMLElement`, `PDFNumber`, `NativeViewElementNode`, `AnimationDesc`, `TicketMod`, `RuleAttribute`, `IDropboxEntry`, `TargetStr`, `TextType.StyleAttributes`, `Express.Response`, `SpawnPromise`, `WalletMock`, `IOutput`, `DocumentCollection`, `Staking`, `RPGGame`, `TSAssign`, `BuddyBuild`, `AmmConfig`, `SwitchStatement`, `OptionConfig`, `QueryData`, `ModifyEventSubscriptionMessage`, `ts.ExportAssignment`, `AuthenticationPolicy`, `IDateColumn`, `UserManagementService`, `AlertProps`, `TagInformation`, `JsonWebKey`, `word`, `LoaderData`, `TextInputProps`, `MockResource`, `TInterval`, `PayableTx`, `MutableRefObject`, `ThermostatMode`, `PostType`, `BindingOrAssignmentElement`, `VdmEnumType`, `CachedType`, `TaskItem`, `Models.KeyValuePair`, `UriComponents`, `ResolveXName`, `WebGLRenderingContext`, `formatting.FormatContext`, `BrewView`, `GeomNode`, `ContentSource`, `GraphIIP`, `DraggableEvent`, `GX.KonstAlphaSel`, `NgSelectConfig`, `ValueSuggestionsGetFn`, `UnitHelper`, `PropertiesService`, `BeEvent`, `SampleExtractionResult`, `PathParamOptions`, `AttachmentItem`, `SerializedHouse`, `DubboTcpTransport`, `ToggleProps`, `AllMdastConfig`, `DataDefinition`, `LinkState`, `VNode`, `CustomMerge`, `StoreGroup`, `ISubscription`, `ConfigurationItem`, `FixResult`, `Left`, `NormalizedIdentifierDescriptor`, `_app`, `TransactionalFileSystem`, `StorageItem`, `WatcherMap`, `SignalingClientSubscribe`, `MiddleColumnPadCalculator`, `QuaternionKeyframe`, `DynamicAttrs`, `FormFieldProps`, `PanService`, `RpcResponse`, `IGridColumnFilter`, `KibanaFeatureConfig`, `FolderDetector`, `UserDetailsQuery`, `White`, `Console`, `IToolbarDialogAddonHandler`, `LocaleRecord`, `SemanticClassificationFormat`, `DeleteContactCommandInput`, `OnChangeType`, `BreakNode`, `ObjectWithType`, `MapMouseEvent`, `AssignmentExpressionNode`, `WarehouseService`, `PerformanceEntryList`, `ProjectReference`, `Refable`, `ApplicationShell.Area`, `EnumShape`, `ValueXY`, `THREE.Ray`, `ImageBitmap`, `TokenGroup`, `Evaluated`, `IComponentOptions`, `AddressNode`, `HealthPolledAction`, `AddressData`, `THREE.Matrix4`, `ICoverageFile`, `RootElement`, `RadixSpunParticle`, `DelayLoadedTreeNodeItem`, `IEquipment`, `PrivilegeCollection`, `github.GitHub`, `OpCode`, `$RequestExtend`, `RedisOptions`, `CombinationKind`, `XMLElementUtil`, `ImageSize`, `requests.ListVolumeGroupBackupsRequest`, `ThyNavLinkDirective`, `UserService`, `ArenaFormatings`, `EVM`, `AsyncValidatorFn`, `NuxtContext`, `DateBodyRow`, `SimpleInputParamsCommandInput`, `S3Action`, `PatternClassArgumentNode`, `TradeResponse`, `LeafletEvent`, `MROpts`, `NNode`, `Bean`, `CancellablePromiseLike`, `ReleaseAsset`, `SymbolMap`, `PropagationResults`, `ZWaveLogInfo`, `GraphModel`, `CallappConfig`, `GetZoneRecordsRequest`, `NumberPattern`, `MailService`, `PhysicsHandler`, `SourceMapConsumer`, `PluginResourceSettings`, `EnumDef`, `BrickRenderOptions`, `ILogoProps`, `Primitives.Numeric`, `MediaElementAudioSourceNode`, `Yendor.BehaviorTree`, `d.ComponentRuntimeMeta`, `ts.JSDocTag`, `DataKey`, `CachedKey`, `AutoImporter`, `Script`, `JoinMode`, `IconifyIconName`, `DomService`, `ElectronStore`, `BaseType`, `FieldHierarchyRecord`, `CSVDataImporter`, `ManifestInventoryItem`, `ReducerState`, `EFood.Session`, `Reshape`, `CardCommon`, `SqlEntityManager`, `OptionsHelper`, `IntrospectionOptions`, `SyncResult`, `WebGPUTensor`, `PromiseJsExpr`, `requests.ListSubscriptionsRequest`, `ReadResult`, `ExpRes`, `Vector3`, `MXCurve`, `CornerMarker`, `OverlayOptions`, `CandidateStore`, `PartOfSpeech`, `DTMock`, `TInjectTokenProvider`, `IMap`, `CkElementContainer`, `IColumnRelationMetadata`, `TemplateListItem`, `EllipseEditUpdate`, `Metas`, `SonarQubeConfig`, `RulesTestContext`, `CommonPrefix`, `CompletedPayload`, `IVSCodeWebviewAPI`, `GetMessageKeys`, `TypesStart`, `CssParser`, `TimelineKind`, `SfdxOrgInfoMap`, `PostprocessSetOptions`, `FlatCollection`, `IReaderRootState`, `StatefulSearchBarProps`, `SprottyWebview`, `AnyBuildOrder`, `TranspileOptions`, `ValidationError`, `PartyRemove`, `HsAddDataUrlService`, `DialogForm`, `AccountRepository`, `FormConfigProps`, `QueryGroupRequest`, `BindingWhenOnSyntax`, `Constraints`, `PluginCodec`, `IEndpointSpec`, `GlobalPropertyStruct`, `SerumMarket`, `INeonNotification`, `ChartPoint`, `RedirectUri`, `BitFieldResolvable`, `FormattedString`, `VideoDownlinkObserver`, `FeedbackShowOptions`, `DataContext`, `CtrOr`, `WebGLRenderer`, `SymbolId`, `VictoryPointsBreakdown`, `BertNLClassifierOptions`, `Storybook`, `V1StepModel`, `APITweet`, `GfxQueryPool`, `DebugProtocol.StackTraceResponse`, `MockTrueCurrency`, `ResetAction`, `TextureProvider`, `DataViewCustom`, `UpdateConnectionResponse`, `ImageTransformation`, `Repeat`, `SqsMetricChange`, `Pool.Options`, `RigidBody`, `Marshaller`, `RedisModuleOptions`, `ResultValue`, `LayerObjInfoCallback`, `AccountRepositoryLoginResponseLogged_in_user`, `EmptyActionCreator`, `ResourceNotFound`, `DeepPartial`, `RegexComponent`, `AaiMessageTraitDefinition`, `NamedMouseElement`, `UIBrewStorage`, `GfxVendorInfo`, `ethers.Signer`, `Favor`, `TestFn`, `PathFinderGoal`, `A3`, `TransactionVersion.Testnet`, `utils.BigNumber`, `AggsAction`, `SHA3`, `APIResponseCallback`, `MyMap`, `BigLRUMap`, `PercentLengthType`, `DocgeniContext`, `FloodProcessEnv`, `Key`, `MainProps`, `FieldFilterState`, `IUIMethodParam`, `OpenSearchDashboardsReactContext`, `SpectrumElement`, `QueryShortChannelIdsMessage`, `CliManipulator`, `parse5.Element`, `ReminderFormatConfig`, `GeneratedKeyName`, `OneOrMany`, `Counter`, `CreateCommentDto`, `IPagingTableColumn`, `FieldStruct`, `IndexNode`, `MangoQuery`, `EntityConstructor`, `ILogParseHooks`, `KanbanRecord`, `ClassSelector`, `DeleteTransformsRequestSchema`, `TopNavigationEntry`, `DefaultValue`, `GraphQLResolveInfo`, `GenericList`, `UnidirectionalTransferAppState`, `RootVertex`, `HTMLIonRouterElement`, `Pattern`, `DirtiableElement`, `ESSearchSourceDescriptor`, `BulkInviteCommand`, `AggregateQuery`, `RippleSignatory`, `LeaveRequest`, `DatasetOpts`, `requests.ListIncidentsRequest`, `NavigationProp`, `ButtonTween24`, `Secret`, `Tensor3D`, `ApiParameter`, `NullLogger`, `DataSink`, `FragmentDefinitionMap`, `DynamoDbWrapper`, `SourcePosition`, `TItem`, `SavedObjectDashboard`, `NaotuConfig`, `TopLevelDeclarationStatement`, `ScanCommandInput`, `id`, `CalculatedIndicatorValues`, `DisplayCallbacks`, `District`, `DescribeClustersResponse`, `IsTenantAvailableInput`, `SassNumber`, `CreateEntrypoint`, `Named`, `LanguagePackage`, `LuminanceSource`, `JsonRpcResult`, `CartesianTickItem`, `InlineVariable`, `GlobalEvent`, `TableNS.RowProps`, `TypeHierarchyItem`, `ServiceIdentifier`, `CSSObject`, `Market`, `GraphDataProvider`, `RTCIceCandidate`, `ToolbarProps`, `SearchDetails`, `LineProps`, `LocaleSpec`, `KeyHandler`, `VNodeLocation`, `ChannelCardType`, `Sponsor`, `SwitcherResult`, `RenameInfo`, `QueryArgDefinition`, `WaveShaper`, `AssembledObject`, `DeleteCertificateCommandInput`, `MangoCache`, `TYPE`, `BufferUseEnum`, `HttpOperationResponse`, `IDBOperator`, `CreateArticleDto`, `ServiceKeyType`, `ReflectCreatorContext`, `IImageConstructor`, `Hsla`, `FlattenedFunnelStepByBreakdown`, `MDCSemanticColorScheme`, `NavigationTransition`, `ScannedFeature`, `B16`, `AstNodeMerger`, `ReadableData`, `I18nEntries`, `AssociatedName`, `EncryptionError`, `LogLevel`, `RedundancyConfig`, `tinyapp.PageOptions`, `Vuetify`, `SonarQubeMeasureResponse`, `ThresholdedReLU`, `ListPager`, `ARPlane`, `v2.WebDAVServer`, `tBootstrapArgs`, `G6Node`, `TreeNodeWithOverlappingSubTreeRoots`, `ContractCallPayload`, `Popover`, `ConditionFn`, `GameDataStateRecord`, `OptionValue`, `BRRES.RRES`, `EventFetcher`, `RecurringDepositsService`, `KnotVector`, `RouteHandler`, `SphereCollisionShape`, `TestUnitsProvider`, `SidePanelTransitionStates`, `IDynamicPortfolioColumnConfig`, `DataSourceItem`, `Struct`, `DeleteInstanceProfileCommandInput`, `ProjectListModel`, `BufferLike`, `CycleDimension`, `SharedState`, `EngineResults.ListMigrationDirectoriesOutput`, `SqlTuningTaskSqlExecutionPlanStep`, `ListChannelMessagesRequest`, `BeDuration`, `FileBox`, `OrgEntityPolicyOperations`, `SecuritySchemeObject`, `EarlyStoppingCallbackArgs`, `RuntimeOptions`, `ProtractorExpectedConditions`, `DialogLanguageModule`, `ConnectionCredentials`, `IRouter`, `AnimationProps`, `CustomIntegrationsPluginSetup`, `CompilerConfig`, `FlushEventArgs`, `d.HydratedFlag`, `VoiceOptions`, `RRule`, `SanityTestData`, `V1Node`, `IColonyFactory`, `RevocationStatus`, `ArrayBufferView`, `LQuery`, `DisplayNameChangedListener`, `VoidAnyEvent`, `IssueType`, `d.ComponentCompilerStaticMethod`, `CreateGlobalClusterCommandInput`, `UserSettings`, `IQResolveReject`, `SQLParserVisitor`, `App.SetupModule`, `ValidateErrorEntity`, `CustomDate`, `IVirtualRepeater`, `model.domain.DomainElement`, `SelectBaseProps`, `WalletContractService`, `DMMF.Document`, `ecs.TaskDefinition`, `Tip`, `LocalState`, `CallbackDisposable`, `ArrowCallableParameter`, `FundingCycleMetadata`, `ApiPackage`, `Spring`, `SimpleRule`, `SubscriptionAlreadyExistFault`, `ViewCell`, `UICollectionViewLayout`, `TEAttribute`, `DayPickerContextValue`, `cc.AudioClip`, `PrinterOptions`, `DatabaseInstanceHomeMetricsDefinition`, `DeleteRetentionPolicyCommandInput`, `EventDef`, `TDispatch`, `jdspec.ServiceSpec`, `UsersActionType`, `ProjectTechnologyChoice`, `ParamsSpec`, `CloseReason`, `ModelResponse`, `ShValue`, `TypeParameter`, `CasesClientMock`, `Envelope`, `SocialError`, `IssuesListCommentsResponseItem`, `Conferences`, `FieldState`, `NSDictionary`, `CopyDBClusterParameterGroupCommandInput`, `HemisphericLight`, `ResolvedDeclarations`, `ReadonlyESMap`, `TSFiles`, `ParsedMapper`, `HTMLOptions`, `IViewPortItem`, `InternalSettings`, `PathEndCoordinates`, `ko.Subscription`, `InternalModifiers`, `ComputedBoundsAction`, `MetricDataQuery`, `ExtractorChecker`, `IWriter`, `ISessionService`, `OutputTargetDocsJson`, `UpdateGroup`, `TopologyData`, `MeasureSpecs`, `NgxGalleryOptions`, `InputObjectTypeDefinitionNode`, `DaffCartFactory`, `ethers.providers.Provider`, `TFnWatcher`, `YAnnotation`, `TRequest`, `BlocksInfo`, `PostMessageOptions`, `ClientIdentity`, `ForgotPassword`, `ScaledUnit`, `IConnectionInfo`, `IDeployState`, `ClientOrderGoodsInfo`, `ArrayType2D`, `FileService`, `AdaptMountedElement`, `SecuredSubFeature`, `GetCertificateCommandInput`, `AbstractMethod`, `BaseSession`, `SObjectDescribe`, `VSCodeBlockchainOutputAdapter`, `NamedObject`, `SlpRealTime`, `AlterTableNode`, `IConnectedNodes`, `GroupNode`, `StopApplicationCommandInput`, `OutboundPackage`, `GameStartType`, `Arrayish`, `ClientModel`, `NotifyFn`, `NestedResource`, `PostCombatGameState`, `WebWorkerEnvironment`, `ServicePort`, `ParsedAccount`, `AttributionsToResources`, `RequestWithSession`, `PersistentVolumeClaim`, `IFormikStageConfigInjectedProps`, `BatchProcessResponse`, `RenderTargetTexture`, `ColorScale`, `CategoriesState`, `BookmarkTreeNode`, `EngineResults.DevDiagnosticOutput`, `DemoConfig`, `AndroidConfig.Resources.ResourceXML`, `ProtoPos`, `ChartHookReturnType`, `ParsedFunctionJson`, `MultiMult`, `RouteContext`, `ScopeQuickPickItem`, `CodeMirrorEditor`, `HTMLVmMenuRadioElement`, `VisibilityFilters`, `SlateNode`, `PageSort`, `ng.IHttpProvider`, `EdgeData`, `IndexMap`, `CookieEchoChunk`, `TransportStream`, `PartitionHash`, `TkmLogger`, `Viewer.Texture`, `CellSelection`, `TabModel`, `T.LayerStyle`, `IFrameHeader`, `NumericOperand`, `ListTemplatesCommandInput`, `ColumnChunk`, `SetOption`, `IFilePane`, `DatabaseItem`, `ModuleInstanceState`, `ITranslateConfig`, `TouchPulse`, `OperatorValueFilterDescriptor`, `ColorType`, `compare`, `NameObj`, `ParameterMap`, `Fault`, `TimePickerBase`, `DejaPopupConfig`, `GroupProblemData`, `TsConfigSourceFile`, `WechatMaterialIdDTO`, `FunctionServices`, `VerifiedToken`, `LabelService`, `BaseUAObject`, `IssuesCreateCommentParams`, `PackageFiles`, `LoggerOutput`, `lspCommon.WorkspaceType`, `FormRenderer`, `HorizontalAnchor`, `UpdateAppInstanceCommandInput`, `XYPoint`, `PhotoDataStructure`, `PositionWithCaret`, `Directionality`, `NgModule`, `CellRenderer`, `CaseUserActionsResponse`, `Config.Path`, `IProvider`, `UseCaseBinder`, `FullLocaleData`, `DeferredImpl`, `IconData`, `CLINetworkAdapter`, `GfxSamplerFormatKind`, `SerializeErrors`, `AnyResponse`, `BasicGraphPattern`, `EditorWidget`, `NetworkInfo`, `ModalRef`, `mat2d`, `ProgressBarProps`, `GraphInputs`, `tag.ID`, `FilterTrailersStatusValues`, `FileSystemState`, `UseSelectStateOutput`, `STLoadOptions`, `ListRecommendationFeedbackCommandInput`, `ts.IfStatement`, `HsAddDataService`, `messages.Source`, `ErrorAlertOptionType`, `StoreConfig`, `MapsManagerService`, `WorkerContext`, `RankState`, `PromiseFulfilledResult`, `Event1EventFilter`, `UIFunctionBinding`, `BufferFormatter`, `CallSignature`, `SetAccessorDeclaration`, `GenericOneValue`, `FM.DllFuncs`, `Departement`, `CurveLocationDetailArrayPair`, `MockWrapper`, `NodeJS.Dict`, `HighlightSet`, `MutableContext`, `VFileCompatible`, `CommandError`, `MatrixDynamicRowModel`, `ToastActions`, `LogAnalyticsCategory`, `IFB3Block`, `ODataResponse`, `React.ComponentClass`, `PartialBindingWithMeta`, `NetworkType.Mainnet`, `ContextCarrier`, `Atom.Range`, `IResizeState`, `SyncValue`, `ListRoomsRequest`, `ServerKeyExchange`, `minimist.ParsedArgs`, `CodeBlockWriter`, `CombatantTypes`, `ChangeCallback`, `SlopeWallet`, `LaxString`, `OutlineSymbolInformationNode`, `Select`, `SearchExpression`, `HistoryService`, `ELEMENT`, `CutLoop`, `ExpShapeConcat`, `SwitchFunctorEventListener`, `NavigationType`, `BasicIteratorResult`, `CompositeName`, `GetNotificationsFeedCommand`, `ParseState`, `MovieDAO`, `InternalDefaultExpression`, `XPCOM.nsIFile`, `WakuMessage`, `XTermMessage`, `InteriorInternal`, `ChartLine`, `ListResponse`, `CompilerEventDirAdd`, `BoxSizer`, `IColorModeContextProps`, `IHttpRes`, `TimePickerControls`, `PlasmicConfig`, `MapObjectAdapter`, `CreateDBParameterGroupCommandInput`, `JsonDocsPart`, `GenericCompressor`, `ITableSchema`, `BrowserFiles`, `UserIdentity`, `AthleteSettingsModel`, `jsdoc.Annotation`, `StyletronComponent`, `IAureliaClassMember`, `TableRowData`, `RxnPlus`, `ICurrentUserState`, `ShelfFunction`, `FeatureKibanaPrivileges`, `QueryCommandInput`, `DeleteAccessPointCommandInput`, `StatusCode`, `GRU`, `GeoPosition`, `Dialogue`, `Twilio`, `RegisteredServiceAccessStrategy`, `DirectionConfiguration`, `PQLS.Library.ILibrary`, `FormatProvider`, `Binary`, `ProxyRequestResponse`, `TouchMouseEvent`, `DomCallback`, `SearchByIdRequest`, `vscode.Extension`, `PlaceholderEmbeddableFactory`, `Delegate`, `EventFacade`, `Ajv`, `TSContinue`, `MutableQuaternion`, `Hash256String`, `IInputIterator`, `MFAPurpose`, `AssociationCC`, `GreenhouseJobBoardJobNode`, `MeetingSessionConfiguration`, `BreakpointFnParam`, `GalleryState`, `SConnectableElement`, `Anchored`, `AngularScope`, `ProgressProps`, `MockValidatorsInstance`, `ScopeFilter`, `TCommand`, `ISlackPuppet`, `QueryBidRequest`, `Topic`, `quantumArray`, `ThemeNeutralColors`, `OasDocument`, `SettingOptions`, `TImportError`, `ServiceHttpClient`, `OrientedBox3`, `SerializedObjectType`, `ConfigStructShape`, `RtcpReceiverInfo`, `FeaturedSessionsActions`, `ConstantState`, `DescribeInputCommandInput`, `SynchrounousResult`, `SocketInfo`, `GasPriceOracle`, `VertexData`, `ConnectArgs`, `ThyTooltipConfig`, `OnEventCallback`, `RootValue`, `ComponentFramework.Dictionary`, `ConfigHttpLoader`, `Plugins`, `GraphQLParams`, `ShaderDefine`, `PutObjectCommandInput`, `DateInputProps`, `IcuExpression`, `GeometryRenderer`, `IKEffector`, `MyType`, `WeapResource`, `IToolbarAction`, `AssetWithMeta`, `IMigrationConfig`, `TooltipData`, `ScreenType`, `LogState`, `NativeFunction`, `Map`, `UInt128Array`, `EditableNumberRangeFilter`, `QualifiedUserClients`, `IParserOptions`, `GetStudioCommandInput`, `BitmapData`, `FeaturePipelineState`, `DebugProtocol.ConfigurationDoneArguments`, `ExtrusionFeature`, `IndexProps`, `StructureContainer`, `RoomUser`, `IApplicationHealthStateChunk`, `requests.ListSteeringPoliciesRequest`, `OfAsyncIterable`, `DfDvNode`, `AdvancedFilter`, `Helpers`, `IDependency`, `TwingSourceMapNode`, `Filesystem.PackageJson`, `PreferenceChangeEvent`, `LatestClusterConfigType`, `TCssTemplate`, `ILinkedClient`, `SqlPart`, `ShowOptions`, `InputObject`, `PackedBubblePoint`, `ControlsProps`, `Handshake`, `AnnotationRectProps`, `CompositePropertyDataFilterer`, `ISceneObject`, `SinonFakeTimers`, `Diagnostic`, `SDKBase`, `TFLiteModel`, `EchartsTimeseriesChartProps`, `LambdaNode`, `SnapshotListenOptions`, `InternalStore`, `HeadingNode`, `JobStatusResult`, `ProxyConfiguration`, `FileStatusResult`, `ExchangeContract`, `AttentionLevel`, `PartitionedFilters`, `INodeContainerInfo`, `ObjectInstance`, `Goal`, `TabViewItem`, `requests.ListWhitelistsRequest`, `CohortType`, `TorrentDAO`, `ConfigRuntime`, `i`, `IServerSideGetRowsParams`, `MainController`, `Interfaces.RequestConfig`, `ModelSnapshotType`, `S3StorageProvider`, `EntityDto`, `JsonLdDocumentProcessingContext`, `MoviesService`, `CrudRepositoryCtor`, `fun`, `StarterOption`, `ScrollStrategy`, `BoundsOctreeNode`, `RTCIceGatherer`, `WidgetModel`, `CumSumProgram`, `ForwardingConfig`, `GithubUserRepository`, `SelectAction`, `FeaturesService`, `BaseException`, `ForeignKeyModelInterface`, `IndoorMap`, `Row`, `GetUserInfoSuccessCallbackResult`, `R2Publication`, `ProposeMessage`, `GetCellColSpanFn`, `ActionCallback`, `IOSDependencyConfig`, `CodeEdit`, `ImportResult`, `PatternAsNode`, `ButtonWidth`, `PureTransition`, `SecureStorage`, `IBuffer`, `FindManyOpts`, `NavigatorDelegate`, `CredentialsOverwritesClass`, `SequenceConfig`, `Downloader`, `m.VnodeDOM`, `TranslationUnit`, `RunCommandInput`, `EditprofileState`, `semver.SemVer`, `DomainService`, `HTTPError`, `DifferentHandlerParam`, `PermutationListEntryWithTrackingData`, `FunctionBuilderArguments`, `EventPluginContext`, `FieldUpdate`, `Encounter`, `DirectiveResult`, `ThyGuiderConfig`, `WorkerResult`, `VideoPlayer`, `BreakpointKeys`, `StackParameterInfo`, `CustomArea`, `EntityCollectionReducers`, `api.ISummaryTree`, `IVectorStyle`, `AutoconnectState`, `JupyterLabPlugin`, `ProjectViewModel`, `ToastPackage`, `TestToken`, `SomeCV`, `IPoContent`, `ProjectSummary`, `IEmbedVideoOptions`, `MenuSurfaceBase`, `FabAction`, `KeyboardListener`, `AtRule`, `XMLBuilderState`, `LoginUriApi`, `ITracerProfile`, `CharacterSet`, `CollectorSet`, `BorderRadiusDirectional`, `MatchInfo`, `IDejaDropEvent`, `KueryNode`, `MkDirOptions`, `WeakGenerativeCache`, `IOrganizationBinding`, `IMyOptions`, `ComponentData`, `ImageCacheItem`, `DebugSessionCustomEvent`, `FindCharacterMotion`, `Contest`, `T.NodeRef`, `CombinedEntry`, `SfdxTestGroupNode`, `BFS_Config`, `ProgressBarData`, `StylableTransformer`, `EngineResults.DiagnoseMigrationHistoryOutput`, `ServerApi`, `RTCSessionDescriptionInit`, `LoginService`, `UpdateParticipantRequest`, `ValidatorFunction`, `MIRInvokeFixedFunction`, `JellyfishWallet`, `MutationInput`, `GitTagReference`, `ERC1155PackedBalanceMock`, `ContentObserver`, `SvgItem`, `Witness`, `Line3`, `WithGenericsSubInterface`, `MouseEventHandler`, `KillRing`, `ex.PreCollisionEvent`, `NetworkConfig`, `MaterialInstanceConfig`, `EventParams`, `CssSelector`, `TraitNode`, `TrackedHasuraEventHandlerConfig`, `Pow`, `Username`, `NockDefinition`, `FunctionBinding`, `TensorLike`, `TransactionOptions`, `MdastNodeMap`, `EventTarget`, `Projection`, `ChannelState`, `ExpressionAstExpressionBuilder`, `RenderCompleteListener`, `ProjectDefinition`, `ICfnSubExpression`, `IGCNode`, `SelectableObject`, `React.DragEvent`, `AuthenticationHeaderCloud`, `TypographStyle`, `sinon.SinonStubbedInstance`, `StoreData`, `TearrData`, `DebugProtocol.StepOutArguments`, `NbThemeService`, `UpdatePayload`, `CharGroup`, `TimeState`, `CommonOptions`, `UseQueryResult`, `ForOfStatement`, `ICommandHandler`, `NewsroomState`, `DeleteAttendeeCommandInput`, `HandlerDefinition`, `CalculateNodePositionOptions`, `Field.PatchArgs`, `TestSystemContractsType`, `ReindexSavedObject`, `ECharts`, `FolderRequest`, `GeoNode`, `GetOwnPropertyDescriptors`, `OrphanRequestOptions`, `CliOutput`, `MutableVector3`, `StyledTextNode`, `AssetItem`, `AppStoreModel`, `ObjectPattern`, `WorkNodes`, `IPolygonPoint`, `RPCPayload`, `ListOpsInfo`, `DeserializedType`, `AnkiOperationSet`, `SocketService`, `IdentityPermissions`, `ResourceType`, `SGArcItem`, `DragState`, `NotebookCell`, `Deletion`, `CardActionConfig`, `InitConfig`, `MockChannel`, `After`, `ShaderSemanticsInfo`, `QuerySuggestionGetFnArgs`, `AwsServiceFactory`, `AESKey`, `knex`, `AirlineEffects`, `SubscriptionList`, `Walker`, `PoolTaskDataService`, `VarianceScalingArgs`, `NavigationDescriptor`, `UseFormReturn`, `ScrollData`, `CreateAskDTO`, `HostSchema`, `SubstanceEnv`, `T_0`, `Circle`, `ManyToManyPathMap`, `BrowserFftSpeechCommandRecognizer`, `HostRule`, `KnownFile`, `ErrorOptions`, `FetchPolicy`, `AuthAndExchangeTokens`, `CollectedData`, `ExceptionalOpeningHoursDay`, `CronProcessTable`, `PropertyRecord`, `GUIDestination`, `RecordedDirInfo`, `ListRepositoriesReadModel`, `FieldExpr`, `EvaluateFn`, `ResourcePackWrapper`, `CopySnapshotCommandInput`, `UnocssPluginContext`, `ReturnModelType`, `Git.IAuth`, `ConfirmedTransaction`, `IArticleField`, `GetDomainNameCommandInput`, `ParseSourceSpan`, `ts.MethodDeclaration`, `FileUploadState`, `Products`, `MIRGuard`, `IHillResult`, `InventoryFilter`, `Wall`, `PiExpression`, `ReadStream`, `ObservableState`, `IParserState`, `DeviceDetectorService`, `QuickPick`, `ScaffdogError`, `HTMLSourceElement`, `LogValueArgs`, `Knex.ColumnBuilder`, `Events.pointermove`, `SecurityTokenAdapter`, `PowerAssertRecorder`, `StaticSiteZipDeploymentARMResource`, `TxLike`, `Toolbox`, `MirroringHost`, `DescribeDataSourceCommandInput`, `UserChallengeData`, `ValidationOptions`, `android.content.Intent`, `Nat`, `DeleteAssetCommandInput`, `chromeWindow`, `StackElement`, `OutcomeType`, `IContainerRuntimeBase`, `ResourceDoc`, `KernelParams`, `AnyCurve`, `Printer`, `NetworkSecurityGroup`, `Accessibility.ChartComposition`, `EnvironmentTreeItem`, `SharedKey`, `VocabularyStatus`, `ComparatorFn`, `RelativeTime`, `AsyncModuleConfig`, `PlaywrightClientLike`, `ItemOptions`, `EnhancedGitHubEvent`, `DiscordMessageProcessor`, `CreateTag`, `TokenList`, `EnabledPoliciesPlan`, `InteractionEvent`, `ReactTestRendererTree`, `ConstantExpressionValue`, `LogAnalyticsMetaFunctionArgument`, `SpecPickerInput`, `Transform`, `SignatureEntry`, `ng.IQService`, `zod.infer`, `TagNode`, `ConfigMigrator`, `ReactiveInteraction`, `InfectableParticle`, `PriorityQueue`, `IColumnConfig`, `ValuePaddingProvider`, `DragSourceSpec`, `InitializationData`, `StreamData`, `KeybindingItem`, `GetConnection`, `Pie`, `TimeSeriesMetricDefinition`, `BuildHandlerArguments`, `UpdateHostClassService`, `MentionData`, `AppStateTypes`, `SecretProvider`, `CollectionPage`, `CallError`, `MEPChromosome`, `CacheFileList`, `ARMUrlParser`, `FlipSetting`, `JQuery.ClickEvent`, `ViewContainerTitleOptions`, `EdmTypeShared`, `ConstructorDeclaration`, `IConversation`, `IProcesses`, `ISceneConfiguration`, `PointsGeometry`, `ResetButtonProps`, `ShortId`, `DrawingId`, `Mutex`, `AnyExpressionRenderDefinition`, `BaseProvider`, `Range3d`, `ColumnMapping`, `Stash`, `ReacordInstance`, `DeleteTagsCommandInput`, `HalfEdgeGraph`, `FilteredPropertyData`, `TestProps`, `Airline`, `DBAccessQueryResult`, `ResolvedTypeReferenceDirective`, `SagaReturnType`, `AddSourceIdentifierToSubscriptionCommandInput`, `RegisterDomainCommandInput`, `GetOperationRequest`, `URLParse`, `EuiTheme`, `ErrorValue`, `MatOption`, `MapComponent`, `HttpHandler`, `FunctionBuilder`, `Preprocessors`, `d.BuildSourceGraph`, `EventActionHandlerMutationActionCallable`, `ReflectionKind`, `TimelineChartRange`, `am4maps.MapPolygon`, `BackendConfig`, `ScriptProcessorNode`, `ParentSpanPluginArgs`, `requests.ListWindowsUpdatesInstalledOnManagedInstanceRequest`, `IpRangeKey`, `ISavedSearch`, `OverlapRect`, `GetProjectResponse`, `VisibilityVertex`, `IRenderService`, `InjectionValues`, `BaseStruct`, `UseGenerateGQtyOptions`, `ICoreService`, `ResOrMessage`, `Scoreboard`, `JsonBuilder`, `MultiChannelAssociationCCSet`, `IDateGrouper`, `LayerEdge`, `SessionStorageService`, `PolicyBuilderElement`, `ArrowHelper`, `FetchAPI`, `JQueryEventObject`, `TKeyboardShortcut`, `ISite`, `WrappedProperties`, `IOidcOptions`, `ora.Ora`, `CourseUser`, `ProgressionAtDayRow`, `TsProject`, `IHubRequestOptions`, `FzfResultItem`, `RowType`, `DocumentManager`, `RedisClientType`, `GPUTextureView`, `CmsModelFieldToElasticsearchPlugin`, `IUserProfileViewState`, `TDynamicObj`, `Annotations`, `GetAuthorizersCommandInput`, `CharCategory`, `MergeBlock`, `SchemaGenerator`, `ReferenceDirection`, `AggregateCommit`, `DocumentedError`, `AppStateSelectedCells`, `Model.Book`, `IconifyJSON`, `Angle`, `middlewareSingle`, `VideoStreamIdSet`, `PermissionStatus`, `GraphQLInputFieldMap`, `MDCBaseTextField`, `PopoverStateReturn`, `HotModuleReplacement`, `AnalyticsProvider`, `AsyncSubscription`, `NumberTuple`, `AccountSetOpts`, `CoinPayments`, `SeriesType`, `W3`, `AsyncStream`, `Appointments.AppointmentProps`, `TooltipContextValue`, `TAccessQueryParameter`, `Iam`, `Moon`, `InternalHttpServiceSetup`, `PipeConnection`, `TabStripItem`, `IImageAsset`, `DescribeScheduledActionsCommandInput`, `$ResponseExtend`, `BuildingState`, `PairingTypes.Proposal`, `ResourceTypes`, `ReaderTaskEither`, `GridEntry`, `EventAdapter`, `CreateProgram`, `RowList`, `Electron.WebContents`, `FixedDepositsService`, `ConnectableObservable`, `RunOptions`, `Session.IOptions`, `gcp.Account`, `requests.ListModelDeploymentsRequest`, `ContentType`, `DaffCountry`, `NonPayableTx`, `Node.NodeMessage`, `IComponents`, `ExtractorResult`, `CategorySummary`, `MatchingDirection`, `SearchExpressionGroup`, `ParsedRepo`, `FormReturn`, `FormBuilderService`, `ECPair.ECPairInterface`, `Realm.ObjectSchema`, `IReducer`, `Dryad`, `PositionType`, `Tmpfs`, `ThyTransferSelectEvent`, `DecompileResult`, `LongNum`, `WaitForScript`, `DigitalNode`, `ts.ExportDeclaration`, `ISnapshotTreeEx`, `Firestore`, `CreateIndexNode`, `NodeRequest`, `TSTypeParameterInstantiation`, `CachedImportResults`, `SerializedError`, `TypeFacts`, `SampleProduct`, `TypedEmitter`, `AstNodeParser`, `EntityActionPayload`, `PreviewDataApp`, `LinearScale`, `ContentTypeProps`, `requests.ListWindowsUpdatesRequest`, `LineType`, `RTCRtpSender`, `TypeVarType`, `OsuSkinTextures`, `Membership`, `Challenge`, `ConfigIntelServer`, `FabricObject`, `GeoCoordinates`, `egret.Event`, `ICloneableRepositoryListItem`, `TransitionPreset`, `ExcludedEdges`, `Instance_t`, `RefreshAccessTokenAccountsRequestMessage`, `ITreeItem`, `Closeable`, `OrExpression`, `DiscordMessageActionRow`, `ArgStmtDecl`, `Arity2`, `CellStyle`, `MiniNode`, `Timeline.State`, `StoredEvent`, `RoomEntity`, `LRU.Options`, `StartJobCommandInput`, `PortInfo`, `ClipboardJS`, `ActivityItem`, `NucleusChannel`, `IHotKeyConfig`, `IValueFormatter`, `THREE.Path`, `MdxModel`, `OpenAPI.PathItem`, `AlertTitleProps`, `requests.ListSecretsRequest`, `VisualizeEditorCommonProps`, `EnumMember`, `DBMethod`, `RecordsRefs`, `FormFieldsProps`, `AccountCustom`, `ESLCarousel`, `EmailOptions`, `ListRecommendationsRequest`, `ContractDefinition`, `SignDocWrapper`, `IPackageRegistryEntry`, `NetInfoState`, `STWidgetRegistry`, `LocalActions`, `SentryRequestType`, `Indy.LedgerRequest`, `Address6`, `EventCallback`, `CorrelationIdGenerator`, `WordcloudPoint`, `EditorConfig`, `FcException`, `ShapeField`, `MatSidenav`, `LineString`, `CheckedObserver`, `FeatureValue`, `DefinedSmartContract`, `PointStyle`, `IProgressReporter`, `UnionOrIntersectionTypeNode`, `ServiceDiscoveryPlugin`, `AudioDescription`, `CreateScriptCommandInput`, `CPU`, `AuthenticationDetailsProvider`, `InteractiveStateChange`, `CreateEndpointCommandInput`, `OverridePreferenceName`, `PrefFilterRule`, `FastifyPluginCallback`, `OpticType`, `SceneItem`, `MockMessage`, `Technical`, `formField`, `MasternodeBlock`, `HTMLIonAccordionElement`, `RemoteAction`, `IKeyIterator`, `ConnectionInformations`, `ComputedStyle`, `ContinueNode`, `DeleteInputCommandInput`, `AlertDetails`, `IPlotState`, `RealtimeUsersWidgetData`, `GraphQLCompositeType`, `Web3Provider`, `HSD_Archive`, `MergeTree`, `SagaConfig`, `Fig.Option`, `TextDocumentWillSaveEvent`, `WindowRef`, `RequireNode`, `LambdaMetricChange`, `ChannelCredentials`, `TileTestData`, `MatchPathAsyncCallback`, `EntityKey`, `IMagickImage`, `AssociationValue`, `GalleryItem`, `TReference`, `GetTagsCommandInput`, `ImportsAnalyzerResult`, `BaseClosure`, `AkimaCurve3dOptions`, `AType`, `BatteryStateEntity`, `SetValueOptions`, `PropertyType`, `ListAppInstanceUsersCommandInput`, `ParseEvent`, `CompositeDisposable`, `Fraction`, `CspDirectives`, `PublicUser`, `ElementModels.IElementWrapper`, `cc.Vec3`, `CustomReporterResult`, `SignCallback`, `GfxBufferUsage`, `CertificateFilter`, `DetectedCronJob`, `INumberFilter`, `TextStyle`, `ISwaggerizedRouter`, `IRemote`, `StepGenerator`, `SampleCartProduct`, `StatusResult`, `SvgIconProps`, `FileStatWithMetadata`, `WriteValueOptions`, `ConeLeftSide`, `ModelPrivate`, `IElementStyle`, `Navigation`, `IVisualizerVertex`, `MutationResolvers`, `IPQueueState`, `HighRollerAction`, `ts.CommentRange`, `HybridConnection`, `ParameterName`, `CreateConnectionCommandInput`, `PostSummary`, `UserName`, `AppManager`, `TypedComponent`, `RuleActionChange`, `RuleResult`, `EditDialogData`, `d.HostRuleHeader`, `GlyphData`, `ServerWalletAPI`, `IMessageValidator`, `ASSymbol`, `LinkOpts`, `ServiceRequest`, `IEntityAction`, `SplineRouter`, `ResponseTiming`, `PolyIDAndShares`, `NodeRef`, `CharsetNameContext`, `TopologyObjectId`, `DevcenterService`, `ActiveOverlay`, `TreeStructure`, `MonthViewProps`, `PerformanceResourceTiming`, `IceTransport`, `DiagnosticSeveritySetting`, `NowBuildError`, `LogAnalyticsLabelDefinition`, `GitHubRepoData`, `UpdateEmailTemplateCommandInput`, `StateStorageService`, `BzlConfiguration`, `BindingInputBase`, `LayoutActor`, `JointConfig`, `AxeResults`, `IDataFilterResult`, `Kernel.IKernel`, `iElementInfo`, `ElementStylesModifier`, `IToolbarItemProps`, `ILoginResult`, `ClassLike`, `Toppy`, `SubscriptionsClientOptions`, `CharStream`, `React.Navigator`, `SaleorClient`, `UpptimeConfig`, `UserAccount`, `DataflowAnalyzer`, `Next`, `P.Logger`, `ITelemetryErrorEvent`, `StoreClass`, `ParsedSelector`, `DefaultRootState`, `CliApiObject`, `AnimationRange`, `ScopeNamer`, `ValidationService`, `PredicateContext`, `FirmwareUpdateMetaDataCC`, `InputActionMeta`, `PDFCatalog`, `AlertOptions`, `BSPBoxActor`, `ProviderRegistry`, `ICurrentWeather`, `ClrHistoryModel`, `ISimpleType`, `TableNS.CellProps`, `MentionSuggestionsProps`, `ComponentCompilerState`, `ChatParticipant`, `TwingTemplate`, `RolesService`, `ShapeT`, `Interview`, `TransactionFunction`, `MetricUnit`, `CustomPropertyGetUsage`, `ModuleContext`, `CardData`, `QueryByBucketMethod`, `GraphReceipt`, `IUserIdentity`, `KibanaRequest`, `MortalityService`, `IStorageUtility`, `CallClientProviderProps`, `ImageStretchType`, `CollectDeclarations`, `XYChartScrollbar`, `IGenerateReleaseNotesOptions`, `DocumentPositionStateContext`, `PropsWithUse`, `SketchLayer`, `By2`, `BaseVerifiableClaim`, `DataTypeContext`, `IntrospectionWarnings`, `Pong`, `ConstantSourceNode`, `Handle`, `UpdateAlbumDto`, `MenuInner`, `TransitionConfig`, `ReducerAction`, `SceneControllerConfigurationCCSet`, `SplashScreen`, `DecoratorDef`, `BuilderOptions`, `ValidationEngine`, `InterpreterOptions`, `BracketType`, `SlotValue`, `MenuEvent`, `FixableProblem`, `EditableHippodrome`, `IEdgeRouter`, `InitUI`, `ReplayTabState`, `SpatialAudioSeat`, `CustomWindow`, `DefaultEditorAggParamProps`, `MangoQuerySelector`, `LetAst`, `AggsCommonStart`, `EventListenerHandle`, `Uni.Node`, `MockTemplateElement`, `ArrayServiceGetKeysByTreeNodeOptions`, `Postfixes`, `TrackOptions`, `GroupedTask`, `ThyPopover`, `ScmResource`, `StylableFile`, `IMediaQueryCondition`, `PyteaService`, `BrowserWindowRef`, `SchemaValidatorFunction`, `OptionsInterface`, `BlockAtom`, `DocString`, `IScriptSnapshot`, `LitecoinAddressFormat.Modern`, `TableColumn`, `actionTypes`, `ActivityTimeSeriesMetrics`, `LeakyReLULayerArgs`, `altair.LightClientUpdate`, `AuthorisationService`, `ISuperBlock`, `CreateUserService`, `SavedVisInstance`, `Responses.IViewContentItemResponse`, `Lines`, `Obstacle`, `CdkStepper`, `TypeDecorator`, `AuthorizationError`, `PutEventsCommandInput`, `SQLNode`, `TestProvider`, `Aggregate`, `ReadonlyAtom`, `ActionOptions`, `CreateWorkflowCommandInput`, `LoginSession`, `Hmi`, `FakeUsersRepository`, `ParsedTranslation`, `TextStringLiteralContext`, `Consola`, `Kind`, `QueryHelperService`, `SHA512`, `ReadModelStoreImpl`, `IMusicDifficultyInfo`, `BulkUnregistration`, `LogFn`, `UsePaginatedQueryMergeParams`, `DocumentDataExt`, `PushpinUrl`, `SavedObjectsImportRetry`, `TypeNameContext`, `TriggerConfig`, `ClassMethodDefinition`, `RemoteData`, `GoToOperation`, `FileOptions`, `PDFPageTree`, `FunctionConfig`, `ActionListItem`, `NavLocation`, `Draw`, `IDatabaseDataDocument`, `ViewService`, `AppInfo`, `ODataClient`, `ServerCertificateRequest`, `BasicPizzasProvider`, `PlacementStrategy`, `TestReference`, `Blending`, `AnyEvent`, `d.PrintLine`, `CountService`, `GaxCall`, `UpdateEvent`, `DeploymentTable`, `chokidar.FSWatcher`, `ModelArtifactsInfo`, `MigrateEngineLogLine`, `HmiService`, `ModelEvaluateArgs`, `PackedTag`, `Collator`, `Organisation`, `Contents`, `Wei`, `LambdaService`, `BlockCompiler`, `T.Component`, `TaxonomicFilterGroup`, `PersonalAccessTokenCredentialHandler`, `THREE.Event`, `SeekRange`, `NumberFilterFunction`, `ComponentSID`, `btVector3`, `ParamSpecValue`, `TouchBar`, `ICellModel`, `EnvsRaw`, `TestEvent`, `SavedKeymapData`, `FlexLength`, `actions.Args`, `CDPTarget`, `PQLS.Analysis`, `ConcreteRequest`, `CullMode`, `TransactionOutput`, `StringTableEntry`, `ControlActivity`, `common.EventData`, `QuestionSelectBase`, `SectionVM`, `PropertyEditorProps`, `ImGui.U32`, `RedirectTask`, `GLTFLoader`, `NumberMap`, `VisualizeAppState`, `DmChannelDTO`, `TempDir`, `KeyRange`, `ActionTypeConfigType`, `InitialValues`, `lgQuery`, `AdminJS`, `PopupPositionConfig`, `TSVBTables`, `DiagnosticReporter`, `GltfAsset`, `CMB`, `TypeReferenceNode`, `DefaultPrivacyLevel`, `WaterInfo`, `AsyncThunkAction`, `IPlayer`, `TableListItem`, `RuleWithFlags`, `ng.IAttributes`, `ThyOptionSelectionChangeEvent`, `Mail`, `AnimationCurveKeyframe`, `Globals`, `LocalContext`, `StreamMetricReport`, `J`, `FixedTermLoanAgency`, `ListImportsCommandInput`, `PageHeaderProps`, `IPanel`, `IDirective`, `VertexDescriptor`, `IdentityClient`, `LspDocument`, `HotkeySetting`, `BarColorerStyle`, `VirtualNetworkGateway`, `ProjectProperties`, `GsTeam`, `HiNGConfig`, `Gauge`, `ISerializedRequest`, `IPortal`, `ActionName`, `MatrixUser`, `SquireType`, `HdErc20PaymentsConfig`, `DataColumnDef`, `OnLoadArgs`, `requests.ListInstanceAgentCommandExecutionsRequest`, `pxtc.CompileResult`, `SwaggerDocument`, `IEventHandlerData`, `TEvents`, `IResizedProps`, `SelectionModelConfig`, `CountryState`, `ConstraintSet`, `ArgsOf`, `DataTypeNoArgs`, `vscode.TreeItem`, `Number`, `QCBacktest`, `IAddMemberContext`, `FieldAppearanceOptions`, `ShorthandFieldMapObject`, `P2PInternalState`, `FileAvailability`, `TempFlags`, `Cypress.cy`, `SimpleState`, `IPointPosition`, `AttributeMask`, `TrackingService`, `JsonRpcResponse`, `WebDriver2`, `TransferData`, `PiNamedElement`, `Geopoint`, `CommandArgument`, `UnaryOperator`, `UInt8`, `FilterExpression`, `VocabularySortType`, `RelayerRequest`, `AureliaProjects`, `Drawer`, `ConnectorProperty`, `SettingsV11`, `InternalCoreStart`, `NoncondexpressionContext`, `SearchPredicate`, `EqualityFunc`, `GeometryCollection`, `EncoderOptionsBuilder`, `DatabaseBundle`, `AnalysisConfig`, `next.Group`, `Prando`, `NFT721V2`, `AbstractLogger`, `PaymentTester`, `d.SourceTarget`, `WorkRequestCollection`, `InstrumentedStorageTokenFetcher`, `ts.PropertySignature`, `LESSParser`, `Models.Exchange`, `SavedObjectsExportError`, `ItemsList`, `VKFParamMap`, `AbstractKeymapData`, `DescribeVpcPeeringConnectionsCommandInput`, `MaterialMap`, `LinkModel`, `IBinding`, `ActionStatusResolverService`, `WorkDoneProgressServerReporter`, `ArrayServiceArrToTreeNodeOptions`, `AuthenticationTemplate`, `SVError`, `TokenValue`, `PopulateOptions`, `ContentControl`, `BaseDataOptionType`, `AccordionItemComponent`, `ResolveType`, `apid.VideoFile`, `SatObject`, `Fn0`, `requests.SearchSoftwarePackagesRequest`, `ListDataViewsCommandInput`, `Slab`, `ObjectMakr`, `ToolbarButton`, `Expansion`, `CiaoService`, `GitStore`, `DataTable.ColumnCollection`, `TraceSpan`, `VNodeProps`, `FormFieldMetadataValueObject`, `UserSchema`, `SQLiteDb`, `Port`, `Cycle`, `IProposalCreateInput`, `createAction.Action`, `ApplicationState`, `IHsl`, `RoughRenderer`, `QualifierSpec`, `ExcerptToken`, `TocLink`, `PromptResult`, `PropertyPreview`, `Dexie`, `ImportAsNode`, `ThyAutocompleteContainerComponent`, `NowRequest`, `INgWidgetPosition`, `StyleGenerator`, `DesignTimeProperty`, `Basset`, `Response.Wrapper`, `MatRadioButton`, `Validation`, `CeramicCommit`, `MessageBoxReturnValue`, `zmq.Pair`, `ChannelTypeEnum`, `DeepPath`, `RouterNavigatedAction`, `FieldJSON`, `InferTypeNode`, `IVertex`, `Level`, `CompletionProvider`, `SExpressionTemplateFn`, `DeleteEmailTemplateCommandInput`, `CounterDriver`, `Props`, `MetricsPublisher`, `T0`, `RouterConfiguration`, `ClipsState`, `Fish`, `FormattedBuilderEntry`, `Calendar_Contracts.IEventCategory`, `A6`, `JwtPayload`, `MenuStateBuilder`, `SmdDataRowModel`, `Exhibition`, `TextOp`, `SyncOptions`, `ErrorThrower`, `IModelTemplate`, `ProgressEvent`, `VariableAST`, `ConnTypeIds`, `OutputSchemaField`, `CreateAttendeeCommandInput`, `CopyAsOrgModeOptions`, `ConverterFunction`, `HealthpointLocationsResult`, `ResolverOptions`, `InstructionWithText`, `DownloadedImage`, `DataRecognizer`, `TransformerDiagnostics`, `S2Options`, `RequestPayload`, `TuxedoControlCenterDaemon`, `DashboardType`, `BinaryOperator`, `JsonaValue`, `TestNodeProvider`, `MatchDSL`, `NzResizeEvent`, `three.Mesh`, `TranslationState`, `MemberId`, `Eof`, `ExtraDataModel`, `BlogActions`, `DbList`, `tf.LayersModel`, `Highcharts.NetworkgraphLayout`, `DiffedURIs`, `CallHierarchyOutgoingCall`, `GeneralStorageType`, `SliderProps`, `ExcaliburGraphicsContextOptions`, `TimelineKeyframe`, `SigningKey`, `UseRefetchOptions`, `RpcRouter`, `IChannelDB`, `TokensList`, `RenderableStylesheet`, `WalletProviderInfo`, `SetupModeProps`, `RegionTag`, `WebElementPromise`, `DownloadItem`, `DocumentInitialProps`, `ShortChannelId`, `BinaryShape`, `Choice`, `ProductOptionGroup`, `RedisCommandArguments`, `EdgeAttributes`, `fs.Stats`, `requests.ListVnicAttachmentsRequest`, `MergeDomainsFn`, `ExpShapeConst`, `ContentLayoutDef`, `CPS`, `CommandData`, `CommandResult`, `TypePredicateNode`, `ResourceRequirement`, `LinkedAttachment`, `vue.ComponentOptions`, `EmitterContext`, `MDCTabDimensions`, `BankAccount`, `TouchGestureEventData`, `GitJSONDSL`, `IKeyboardInput`, `requests.ListSendersRequest`, `IMoveFocusedSettings`, `SortValue`, `TemplateLiteralTypeSpan`, `dia.Element`, `SpringFn`, `JsonRpcProxy`, `TagItem`, `EditorOpenerOptions`, `BitbucketAuthTokenRepository`, `ExternalProps`, `ParameterValueList`, `ICancellable`, `SelectionConstructorArgs`, `SubnetAlreadyInUse`, `MaestroTipoModel`, `Quantity.OPTIONAL`, `ParsedCode`, `GfxImplP_GL`, `NzTreeBaseService`, `IMock`, `GameOptions`, `CommentAttrs`, `PadCalculator`, `SchemaValidator`, `jsPDF`, `InnerClientState`, `ListWorkRequestsRequest`, `TMenuOption`, `FormHook`, `SimNet`, `SearchError`, `RequestBodyMatcher`, `FormatDefinition`, `LoadMany`, `HintID`, `App.webRequest.IRequestMemory`, `GridNode`, `FullCertificationRequestDTO`, `ClaimedMilestone`, `WebResourceLike`, `FoldingRangeParams`, `schema.Entity`, `GToasterOptions`, `TypeConstructionContext`, `RemoveFromGlobalClusterCommandInput`, `RefForwardingComponent`, `AccessTokenProvider`, `models.RegEx`, `CustomerAddress`, `IEmployee`, `CreateUserCommandInput`, `AirGapWallet`, `ast.LiteralNode`, `identity.IdentityClient`, `THREE.WebGLRenderer`, `shareComicFace`, `IKibanaMigrator`, `CheckpointWithHex`, `GroupId`, `AxesProps`, `PlanetaryTrack`, `OverloadedFunctionType`, `IPublish`, `IterationState`, `CapabilitiesProvider`, `ArithmeticInput`, `SQS`, `React.ChangeEventHandler`, `EntireGame`, `HookContext`, `PBRCustomMaterial`, `RippleGlobalOptions`, `SVGDefsElement`, `RequestTemplateDef`, `SuiModalService`, `FormikHelpers`, `RunnerInfo`, `Traced`, `t.SourceLocation`, `AnyEventObject`, `Bool`, `ast.NodeAttributes`, `YearAggregations`, `Priority`, `MissingFilter`, `LoggingConfigType`, `sbvrUtils.PinejsClient`, `SGDOptimizer`, `Comonad1`, `PassNode`, `ExactPackage`, `DharmaMultiSigWalletContract`, `VirtualDirectory`, `HttpPipelineLogLevel`, `_ZonePrivate`, `IPointUnit`, `UberChoice`, `AppendBlobClient`, `types.DocumentedType`, `IBlocksFeature`, `ModelViewer`, `TraverseOptions`, `FrequentLicences`, `CommandPacker`, `IEventContext`, `NotificationService`, `React.ComponentPropsWithoutRef`, `DispatcherEmitter`, `TransactionRequest`, `MatTab`, `iff.IChunkHeader`, `IBundle`, `ODataPathSegments`, `LView`, `SocketOptions`, `CSSTemplate`, `GitConfig`, `IFormFieldValue`, `SyncRule`, `SavedObjectsImportFailure`, `EventItem`, `xmlModule.ParserEvent`, `WindowsManager`, `ShurikenParticleRenderer`, `LogLevelValues`, `DbUser`, `ToastyService`, `ImageType`, `GUIController`, `Title`, `World`, `BufferMap`, `ElectronEvent`, `PitchName`, `AllowsNull`, `RuleOptions`, `UpdateRegexPatternSetCommandInput`, `EmbedToken`, `Interceptor`, `ts.ResolvedModule`, `LinkedListChild`, `MockCustomElementRegistry`, `GetTestDestinationOptions`, `PluginDiscoveryError`, `EPeopleRegistryState`, `ProtocolEventMessage`, `MalFunc`, `MergeFsResult`, `WebGLProgram`, `IOSProjectConfig`, `AnyExpressionFunctionDefinition`, `ReplacementBuilder`, `TreeNodeItem`, `UnknownType`, `GenericLayout`, `WebApi.JsonPatchDocument`, `ISubject`, `SnakeheadDataTable`, `BalmError`, `Vector4`, `btCollisionObject`, `UnixTime`, `Float32BufferAttribute`, `JSONFormatter`, `ApplyWorkspaceEditParams`, `ReplyChannelRangeMessage`, `CollectionFn`, `XPCOM.nsIDOMWindow`, `DataResult`, `HTMLFormElement`, `NgbDate`, `ViewItem`, `DecodeError`, `FeeOption`, `ActionParams`, `Phaser.Input.Pointer`, `FacepaintStyleSheetObject`, `AuthAccessCallback`, `PowerShellScriptGenerator`, `DefaultTextStyle`, `TooltipPayload`, `SystemInfo`, `DisconnectReason`, `paper.ToolEvent`, `InvoiceItemService`, `TemplateTransformerData`, `FileAsset`, `ServerlessResourceConfig`, `VisualizationListItem`, `AsyncFactory`, `VAIndent`, `ChromeApi`, `CollectionWithId`, `QueryCacheResult`, `ListQueuesRequest`, `StatusUnfollow`, `ReactComponent`, `SubscriptionCategoryNotFoundFault`, `ActionResult`, `BaseRowDef`, `MultiAPIMerger`, `ProxyInfo`, `QueryOrderOptions`, `LocIdentifier`, `LicenseType`, `Shim`, `IWhitelistUserModel`, `QObject`, `ExpandableTreeNode`, `AggregationMap`, `TestStepResultStatus`, `messages.Examples`, `ExpressRoutePort`, `GtConfigField`, `LegendOptions`, `ExpressionReturnResult`, `Common.ILog`, `ILoggedProxyService`, `NormalizedPath`, `PgType`, `pulumi.ResourceOptions`, `RenderTreeEdit`, `u16`, `RequestTypes`, `SfxData`, `SharedRoleMapping`, `MasterNodeRegTestContainer`, `PortSet`, `WebSocket.CloseEvent`, `IdeaDocument`, `sourceT`, `V3RouteWithValidQuote`, `ContextOptions`, `GraphSnapshot`, `eui.Image`, `DAL.DEVICE_ID_COMPONENT`, `ExecOpts`, `AEADCipher`, `BaseIncrementOptions`, `StartCallOptions`, `USampler2DTerm`, `NamedImportBindings`, `ICheckAnalysisResult`, `SharedElementSceneData`, `URIAttributes`, `TextAreaTextApi`, `TmpfileOptions`, `VinVout`, `GenericRetryStrategyOptions`, `DebugProtocol.ConfigurationDoneResponse`, `RequireStatementContext`, `CanaryScope`, `FlexItemStyleProps`, `UrbitVisorConsumerTab`, `XorShift`, `BlockReference`, `requests.ListDhcpOptionsRequest`, `XMLAttribute`, `LocalMigration`, `PlanPreviewPanel`, `TaskRoutine`, `RtorrentTorrent`, `ListPermissionsCommandInput`, `IErrorPayload`, `SearchOption`, `EditPhotoDto`, `ZipOptions`, `CardContextOptions`, `RegisterInstanceCommandInput`, `requests.ListNatGatewaysRequest`, `AiService`, `RneFunctionComponent`, `SuiComponentFactory`, `WebAppCreateStack`, `VcsInfo`, `Debug`, `SizeWithAspect`, `TSSeq`, `App.services.IHttpChannelService`, `SocketMeta`, `Diffs`, `GX.AttenuationFunction`, `ProcessedCDPMessage`, `DropLogFile`, `ImagePipeline`, `IEnvironmentRead`, `FactoryResult`, `StartRecordingRequest`, `AtomGetter`, `ISaxParser`, `DateRangeMatch`, `TestStruct`, `TextToSpeechClient`, `IStorageProvider`, `BigNumberFive`, `NonFungiblePostCondition`, `IndexField`, `ScenarioData`, `PatternRecognizer`, `ListRoomsResponse`, `QueryCache`, `FlowLabel`, `ITriggerPayload`, `PageTitleService`, `ArrayContent`, `Measurement`, `NgModuleTransitiveScopes`, `interfaces.ServiceIdentifier`, `HTMLScStatusTimelineOverlayRowElement`, `EditValidationResult`, `GroupList`, `KeyframeNodeOwner`, `CertificateManager`, `Tab`, `SelectedGroups`, `requests.ListAgreementsRequest`, `ESCalendarInterval`, `I18NService`, `HandlerAction`, `Broker`, `RenderingDeviceId`, `FinderPattern`, `ZodType`, `ViewerPreferences`, `TooltipInitialState`, `ODataStructuredTypeFieldParser`, `ZoomOptions`, `Reflection`, `MockGuild`, `Primitives.Point`, `Fauna.Expr`, `ListReservationsCommandInput`, `AccessibilityOptions`, `QueryInput`, `HighlightResult`, `androidx.fragment.app.FragmentManager`, `L13Element`, `SymbolDisplayPart`, `BaseRedirectParams`, `languages.Language`, `IFS`, `MappingObject`, `TEnumValue`, `PostgresTestEntity`, `IntFormat`, `AnimatedSprite`, `ProvidersInfoService`, `SpeechConfigImpl`, `DayKey`, `CSSResult`, `RecurringBillId`, `ITransform`, `InlineConfig`, `RelationsService`, `GoToTextInputProps`, `AuthError`, `integer`, `CodeGenModel`, `ConditionsArray`, `TreeProps`, `TensorListMap`, `MarketCurrency`, `MsgType`, `SfdxFalconErrorRenderOptions`, `PvsProofCommand`, `FMAT_RenderInfo`, `IKernel`, `IStreamApiModel`, `GroupedFields`, `PerfGroupEntry`, `MongoManager`, `Conflict`, `JwtToken`, `IDatabaseDriver`, `MDCBottomSheetController`, `OrgDataSource`, `ColumnState`, `SearchResultsPage`, `Enumerator`, `CallableConfig`, `iam.Role`, `TransformerContext`, `DateProfile`, `InvalidRestoreFault`, `IDocumentAttributes`, `TFLiteWebModelRunner`, `StakingData`, `Alg`, `ApigatewayMetricChange`, `BidirectionalLayerArgs`, `EmitterSubscription`, `SidenavMenu`, `SecurityPluginSetup`, `CreateDirectoryCommandInput`, `CSSResultGroup`, `LoadEvent`, `InterfaceWithCallSignatureReturn`, `TreePath`, `BlockPath`, `NgGridItemSize`, `UIntArray`, `Bound`, `TweenMax`, `IHost`, `Builders`, `TimeFilterServiceDependencies`, `CreateForgotPasswordDto`, `OPCUAClientOptions`, `TypeAliasInfo`, `SourceRenderContext`, `GraphQLService`, `NgModuleFactory`, `WebGLVertexArrayObject`, `RectGraphicsOptions`, `SavedObjectsMappingProperties`, `MaybeCurrency`, `IOrganizationSprint`, `BrowserNode`, `WidgetsRegister`, `AccessKeyRepository`, `MapStateProps`, `RtmpResult`, `MarkSizeOptions`, `UiSettingsDefaultsClient`, `LabExecutionService`, `BotFilterFunction`, `Matrix4d`, `FIRVisionImage`, `Monad1`, `EntityCollectionServiceElementsFactory`, `ListAlertsCommandInput`, `ParsedQueryWithVariables`, `DebtRegistryEntry`, `UpdateCategoryDto`, `CalendarPatterns`, `DirectionDOMRenderer`, `StorageCacheService`, `Gradient`, `TileLayer`, `MIREntityTypeDecl`, `MenuMapEntry`, `ScrollDispatcher`, `TeamSpaceMembershipProps`, `SequencePatternInfo`, `AppStoreReplay`, `StringShape`, `PixivParams`, `mongoose.FilterQuery`, `IHeaders`, `TextEditorHelperReturnType`, `QueryDeploymentRequest`, `JSONSchemaSettings`, `RawRow`, `NativePlaceTradeChainParams`, `AbstractSqlPlatform`, `WorkspaceOptions`, `Identification`, `EscrowedPayment`, `BcryptAdapter`, `BlogEntry`, `ArgPathOrRolesOrOpt`, `TooltipItem`, `CourseTask`, `ICommandDefinition`, `SafeVersion`, `InitializeHandlerOptions`, `OperatorSummary`, `Generate`, `ContentInfo`, `KeplrSignOptions`, `UniqPrimitiveCollection`, `GetParameters`, `DictionaryEntryNode`, `DataHolder`, `SentimentAspect`, `AxisPositions`, `MinimalTransaction`, `GenerateTypeOptions`, `EzBackendOpts`, `IPod`, `RegionType`, `TransformPluginContext`, `MdcTextField`, `ObservableMedia`, `BaseParser`, `MVideoFile`, `TimeRangeBounds`, `PullRequestReference`, `TiledMapResource`, `EncodingType`, `Balances`, `RpcMessageBuilder`, `EntityMap`, `ICallsGetByContactState`, `LinePointItem`, `ErrorState`, `validateTrigger`, `SpatialDropout1DLayerConfig`, `TGetStaticProps`, `OptionsInit`, `AbiCoder`, `IThrottlerResponse`, `UtilityService`, `IStorageSyncOptions`, `DIALOG`, `TreeNodeState`, `GoalTimeFrame`, `Continue`, `interfaces.Bind`, `InMenuEvent`, `Radius`, `UIRouter`, `TaskLifecycleEvent`, `LiftedStore`, `SetupOptions`, `RuleModule`, `WeaveNode`, `UnsignedMessage`, `SVGPathElement`, `StringNode`, `TestReader`, `Nullable`, `XSDXMLNode`, `DbTx`, `QueryList`, `GetConnectionCommandInput`, `ASTCodeCompatibilityReport`, `SyntaxTree`, `FnU4`, `PadId`, `ElementSize`, `IMessageResponse`, `SocketGraphicsItem`, `LogicalQueryPlan`, `ExportedNamePath`, `Dashboard`, `InjectedQuery`, `CompositeDraftDecorator`, `AzureCustomVisionProvider`, `EquipmentSharingPolicyService`, `ExpShape`, `requests.ListJobRunsRequest`, `IResults`, `ClassVisitor`, `BasicAcceptedElems`, `utils.BigNumberish`, `AggParams`, `TableModel`, `MetaValue`, `IFormat`, `Routes`, `babel.ObjectExpression`, `ts.Modifier`, `WebSiteManagementModels.SiteConfig`, `IHawkularAlertsManager`, `StoreMap`, `TypeMapper`, `NSApplicator`, `MockEnv`, `RunEveryFn`, `MonoTypeOperatorAsyncFunction`, `SerializableConstructor`, `FunctionImportRequestBuilder`, `ImageInspectInfo`, `EventStream`, `ml.Element`, `IndexPattern`, `u64`, `TranslationFacade`, `ZoneWindowResizeSettings`, `IgnoreQueryParamsInResponseCommandInput`, `IBookmarks`, `ImplicitParjser`, `GitLabFixtureClient`, `ClusterCreateSettings`, `ArgsDescriptions`, `TypedColor`, `NonRelativeModuleNameResolutionCache`, `d.SerializeImportData`, `Decimal`, `SequenceNode`, `OrderedComparator`, `PictureGroup`, `VNodeProperties`, `NodeClass`, `IGradGraphs`, `BaseRequestOptions`, `UiCalculator`, `Konva.Stage`, `PathStyleProps`, `MetronomeBeam`, `RowLevel`, `WaveShaperNode`, `SyncStore`, `ISvgMapIconConsumerProps`, `ShapePair`, `ApiItem`, `ConstantQueryStringCommandInput`, `EmitParameters`, `TExtra`, `ITokenInfo`, `QRCode`, `IPartitionLambdaFactory`, `PartialEmotionCanvasTheme`, `Geoset`, `LoginDto`, `DebugProtocol.OutputEvent`, `DebugProtocol.NextArguments`, `KeyAlgorithm`, `YAMLMapping`, `RequireContext`, `MessageKeys`, `SelectFileModel`, `Mob`, `BatchType`, `IChildNodesMap`, `PCode`, `ChildMessage`, `Order3Bezier`, `FilterPredicate`, `JapaneseDate`, `QueryTuple`, `LatexAtom`, `SafeHtml`, `MenuItemProps`, `TinaCloudCollection`, `GlobalAveragePooling1D`, `ThyGuiderRef`, `CommentDto`, `CSSLength`, `QuickInputButton`, `FileLock`, `Pose2DMap`, `ExportSummary`, `MyNode`, `DeferIterable`, `Quat`, `FormattedStatus`, `WeightsManifestGroupConfig`, `SolidDashedDottedWavy`, `TItemsListWithActionsItem`, `IPerfMinMax`, `TextObject`, `PullRequestNumber`, `perftools.profiles.IProfile`, `CodeGeneratorContext`, `TileContentRequestProps`, `B4`, `next.Sketch`, `InternalServerException`, `MatchedItem`, `NonlocalNode`, `RnM2Node`, `requests.ListDedicatedVmHostShapesRequest`, `Scatterplot`, `ComponentFileItem`, `SQLiteTableDefinition`, `WaitForOptions`, `Toolkit.IPluginExports`, `UnbindFn`, `RouteInfo`, `Credentials`, `SortType`, `DatasetResource`, `ObjectProperty`, `ControllerProps`, `Tsoa.Type`, `AssetId`, `Caller`, `ProgramInput`, `WebResponse`, `TestGroup`, `IFullItemState`, `requests.ListNotebookSessionsRequest`, `ActionPlugin`, `Snackbar`, `ResultT`, `NodeOptions`, `ITiledObject`, `PartialDeep`, `DefaultState`, `NineStar`, `RailRider`, `CarouselInternalState`, `DefaultSession`, `BleepGeneric`, `IEvent`, `SourceFileEntry`, `BoundAction`, `ethers.ethers.EventFilter`, `AudioVideoControllerState`, `InstanceState`, `ResourceCollection`, `TimeoutOptions`, `CoreStart`, `RawBlockHeader`, `FullIconifyIcon`, `FailedShard`, `RemoteUpdateListener`, `WalletEventType`, `PutEmailIdentityMailFromAttributesCommandInput`, `IntType`, `SweepEvent`, `ChatItemSet`, `DtlsServer`, `SensorType`, `WasmTensor`, `BabelTarget`, `EmailActionConnector`, `ProcessStageEnum`, `X509CertificateSupplier`, `GrpcEventEmitter`, `RegisteredClient`, `USB`, `DescribeEventsResponse`, `RelationExt`, `VirtualApplication`, `JSDocPropertyTag`, `W4`, `MessageKind`, `CompanionData`, `Group.Scalar`, `AvatarProps`, `ChannelPermissionOverwrite`, `CreatePhotoDto`, `OracleConfig`, `GLsizei`, `ValidationProfileExt`, `FileTextChanges`, `NotificationHandler0`, `IGarbageCollectionState`, `PluginContext`, `AngularFireObject`, `Frontmatter`, `BodyPixOperatipnParams`, `EntityTypeT`, `UpdateUserInput`, `StreamingClientInfo`, `WasmSceneNode`, `SearchComponent`, `PointerEventHandler`, `SentPacket`, `MongoCommand`, `ClassSymbol`, `YogaNode`, `ListShapesRequest`, `DelNode`, `SolidityValueType`, `ResponsePath`, `RequiresRuntimeResult`, `Denque`, `CodeRange`, `ElementMeta`, `Rule.Node`, `StreamAddOutgoingParam`, `vscode.SymbolInformation`, `FoldCb`, `ITimeline`, `PureTransitionsToTransitions`, `SetMap`, `IEmployeeStatisticsHistoryFindInput`, `MinifyOutput`, `BlockElement`, `OperationData`, `ConfigurationOptions`, `UserButton`, `NatGateway`, `IFloatV`, `AutoSubscription`, `GitBlameCommit`, `GraphState`, `GetDataSourceCommandInput`, `DeployStepID`, `CommentPattern`, `ComplexPluginOutput`, `OpenSearchConfig`, `ActionStatus`, `ListTagsForResourceCommand`, `ControlsService`, `AstIdGetter`, `IFrameAttachment`, `cc.Event.EventKeyboard`, `SeriesItemsIndexesRange`, `AsyncSystem`, `MomentDateAdapter`, `AsyncSettings`, `SvgIconConfig`, `CreatePipelineCommandInput`, `RemoveBuffEvent`, `IFieldCustomizerCellEventParameters`, `AbstractValue`, `DeleteDirectoryCommandInput`, `ContinuousDomainFocus`, `VisEditorOptionsProps`, `Refs`, `GeneratorConfig`, `JWKStore`, `IAssetComponentItem`, `CSVMappingParameters`, `CSSObjectWithLabel`, `PositionOptions`, `TimelineTrackSpecification`, `NewsItem`, `FocusableElement`, `pulumi.ComponentResourceOptions`, `BadRequestErrorInfo`, `CacheSnapshot`, `Bamboo`, `PumpCircuit`, `AlgWithIssues`, `DeployHelper`, `d.E2EProcessEnv`, `ICircuitGroup`, `GenericObject`, `UnoGenerator`, `CookieJar`, `VertexBuffer`, `DistributionData`, `IDisposable`, `StableRange`, `CacheType`, `DefaultPass`, `JSDocState`, `DelegateTransactionUnsigned`, `ActionHandler`, `ObjectCacheState`, `TokenBucket`, `IEntries`, `RsRefForwardingComponent`, `IBatteryCollectionItem`, `ColumnsContextProps`, `PSTDescriptorItem`, `CompositeMapper`, `ReturnT`, `RTCPeerConnectionIceEvent`, `SteamDeviceReport`, `BlockTransactionString`, `IInvoiceUpdateInput`, `SignIn`, `CompilerTargetHandler`, `Phone`, `CallExpressionArgument`, `ProofAttributeInfo`, `HoverTarget`, `ContractWrapperFactory`, `NumberLiteralContext`, `VersionVector`, `StoredTransaction`, `StatBlock`, `OutPacketBase`, `UseCaseLike`, `RotationOrder`, `Transcoder`, `ProtobufValue`, `Computation`, `ValidatedPurchase`, `ReadableOptions`, `TsOptionEngineContext`, `AppStackMinorVersion`, `MockContext`, `TaskPool`, `Renderer2`, `DescribeDBParametersCommandInput`, `SettingModel`, `HttpPayloadTraitsCommandInput`, `ElkNode`, `ConnDataType`, `UpgradeConfigsParams`, `BSPRenderer`, `FileSystemAccess`, `GetUpgradeHistoryCommandInput`, `IDatArchive`, `CreatureType`, `ConsoleSidebarLink`, `UUIDMetadataObject`, `messages.PickleStep`, `ListModel`, `SwitchApplicationCommand`, `SavedObjectsType`, `INavigationFeature`, `ListFindingsRequest`, `IPosition`, `SpheroMini`, `HeaderActionIconProps`, `GraphcisElement`, `Rtcp`, `SubjectInfo`, `requests.ListAutoScalingPoliciesRequest`, `MIRBasicBlock`, `IChangeEvent`, `JsonHttp`, `AnyBody`, `DMMFPAS_Directives`, `SeriesTypePlotOptions`, `d.FsWriteOptions`, `BucketSegment`, `EventIded`, `RobotsTxtOpts`, `SourceLoc`, `ApiMethodScheme`, `QuestWithMetadata`, `All`, `i.Node`, `GluegunToolbox`, `PasswordGenerationService`, `ChatClient`, `BuildFeatures`, `DynamicValue`, `CreateSubscriberCommand`, `drive_v3.Drive`, `Generator`, `GfxProgramDescriptor`, `PowerlevelCCReport`, `IamStatement`, `ErrorController`, `ITelemetryLoggerPropertyBags`, `firebase.FirebaseError`, `MigrateReset`, `CountryCode`, `FaunaIndexOptions`, `ToJsonOutput`, `CreateDeliverabilityTestReportCommandInput`, `InsertPosition`, `IMenuItemProps`, `MutationEvent`, `CandidateCriterionsRating`, `IRegisteredPlugin`, `GPGPUProgram`, `GraphQLClient`, `ComponentRegistry`, `CreateAppCommandInput`, `QueryResolvers`, `MockFluidDataStoreRuntime`, `LoadedConfigSelectors`, `JsonDocsProp`, `BillingInfo`, `RouteParams`, `Datatype`, `SchemaComposer`, `TradeStrategy`, `Addon`, `ExtendedOptions`, `PropTypesMapping`, `TreeNodeGroupViewModel`, `CSSSource`, `ComponentPortal`, `MealTicketRemoval`, `Geom`, `SCN0_AmbLight`, `ObjectPool`, `OrderForm`, `Comparable`, `ImageSourceType`, `StraightCurved`, `AuthenticationType`, `Execution`, `IAppContainer`, `SubInterface`, `LineWidth`, `IVFSMount`, `UntypedProduct`, `ApplyGlobalFilterActionContext`, `HasShape`, `PickerColumn`, `StacksMainnet`, `GridSize`, `ComponentRuntimeMeta`, `Padawan`, `FeatureSettings`, `TConfig`, `FileBlock`, `SecurityManager2`, `ARCamera`, `UpdateAppCommandInput`, `NodeFlags`, `ConvertOptions`, `Exprs`, `FunctionInfo`, `RumPerformanceResourceTiming`, `SerializedValue`, `ExtensionOptions`, `These`, `RestGitService`, `RequestedCredentials`, `BatchSerialization`, `NumberSystemName`, `SubscriptionService`, `DescribeAssetCommandInput`, `ReferenceResult`, `AuthMetadata`, `SessionRequest`, `CartoonOperatipnParams`, `MockStore`, `FireCMSContext`, `ListTournamentsRequest`, `requests.GetProjectRequest`, `DSpaceObject`, `ResolvedEphemeralListType`, `TranslationString`, `Framebuffer2D`, `ChatPlugin`, `MediaDevices`, `DataTypes`, `ActionsType`, `ScriptParameter`, `TreeCursor`, `UserForm`, `ISavedVis`, `SearchParamsMock`, `HierarchyChildren`, `DebugSession`, `IpPermission`, `ISortOptions`, `KafkaSettings`, `BoxBuffer`, `RTCRtpSimulcastParameters`, `EsmpackOptions`, `ListrBaseClassOptions`, `ParsedSchema`, `CodeEditorMode`, `HelpfulIterator`, `AnchorMode.Any`, `requests.ListCrossConnectLocationsRequest`, `SceneFrame`, `AvailableProjectConfig`, `ODataEntitySetResource`, `Bitstream`, `ChangeAnnotationIdentifier`, `BazelOptions`, `IGitApi`, `LinkParticle`, `Electron.OpenDialogReturnValue`, `IonRouterOutlet`, `ProgressOptions`, `TemplateClient`, `BibtexAst`, `SettingContext`, `TSubfactionArmy`, `GoEngineConfig`, `i18n`, `MediatorMapper`, `Yendor.TickResultEnum`, `BoxCollisionShape`, `IFolder`, `TmdbMovieDetails`, `convict.Schema`, `MetricTypes`, `MActorLight`, `DebugProtocol.PauseResponse`, `LanguageServiceDefaults`, `TableAliasContext`, `CustomPaletteParams`, `DOMElement`, `IDiscordPuppet`, `ERC721`, `ApiDef`, `PositionPlacement`, `DefaultRes`, `RelativeFunction`, `BoxSlider`, `requests.ListMigrationsRequest`, `ApolloLink`, `UserTenantRepository`, `ArticleList`, `ArgParser`, `BigDecimal`, `PatternEnumProperty`, `Launch`, `AuthClient`, `ResolveNameByValue`, `SlotTreeItemBase`, `NoteModel`, `GeoLocation`, `TransactionReducerResult`, `GetEnvironmentCommandInput`, `TSigner`, `Quota`, `GunValue`, `TemplateStruct`, `HttpSetup`, `JsonFormsAngularService`, `Entity.List`, `DecodedOffset`, `TelemetryReporter`, `IpcRenderer`, `UpdateLaunchConfigurationCommandInput`, `RuntimeDatabase`, `ParenthesizedExpression`, `ConstInterface`, `SubscriptionEmit`, `ViewTemplate`, `HttpResponseMessage`, `ActivePoint`, `Viewport`, `Analyzer`, `ISerializedResponse`, `BoomTheme`, `ColorComponent`, `SupCore.Data.ProjectManifestPub`, `BaseConverter`, `BigComplex`, `FetchResponse`, `HTMLChar`, `ArianeeWallet`, `ApplicationMetadata`, `TreeSet`, `VdmServiceMetadata`, `ast.AbstractElement`, `MonitoringContext`, `InputProps`, `IMoonData`, `GetDeploymentResponse`, `ComponentLoaderFactory`, `SubMesh`, `InputState`, `DialogueTest`, `EmitNode`, `types.MouseData`, `VerifyCallback`, `Knex.CreateTableBuilder`, `IssueIdentifier`, `TestService`, `KeyboardEventArgs`, `PopStateEvent`, `IFormGroup`, `LogFunctions`, `DependencySpecifier`, `Vec3Term`, `SubmissionJsonPatchOperationsService`, `ServiceName`, `IParameterTypeDefinition`, `inferHandlerInput`, `ActionResultComboCtx`, `HStatus`, `IPythonVenvWizardContext`, `IconifyIconBuildResult`, `CreateOperation`, `InternalProvider`, `LiteralNode`, `... 15 more ...`, `ResolverContext`, `ParseError`, `CorsRule`, `RouteRule`, `WritePayload`, `Fiber`, `InputEvent`, `BiquadFilter`, `Cartesian3`, `GrammarToken`, `ESLintClass`, `Friend`, `WarningLevel`, `StatsChunk`, `CustomerModel`, `AddressService`, `StagePanelManager`, `FilterCriteria`, `CmdletParameters`, `ModuleRpcServer.ServiceHandlerFor`, `ChannelModel`, `messages.Step`, `ITag`, `NFT1155V3`, `ClassMethod`, `GetConnectionResponse`, `ts.TypeAssertion`, `BatchConfig`, `SeriesList`, `PresentationManagerProps`, `StablePlace`, `IFactor`, `ContractCalls`, `AuxBot3D`, `ManagedDatabaseSummary`, `Ray`, `TreeMap`, `ContractAddresses`, `Crdp.Runtime.StackTrace`, `RowHashArray`, `TechniqueDescriptor`, `AsyncComponent`, `IAddressSpace`, `Coda`, `MarkdownContributions`, `FavoriteTreeItem`, `TString`, `LangiumSharedServices`, `WorkerServiceProtocol.RequestMessage`, `LazyExoticComponent`, `LiveAtlasWorldDefinition`, `EventTypes`, `Types.EventName`, `ChartBase`, `EvaluationResult`, `SimpleOrder`, `p5.Color`, `TLSSocket`, `PageCloseOptions`, `GoogleTagManagerService`, `ConfigurationCCGet`, `CameraCullInfo`, `StateUpdater`, `FIRDocumentSnapshot`, `BalmConfig`, `FindSubscriptionsDto`, `ClrQuickListValue`, `MethodWriter`, `WrappingCode`, `NSString`, `PrivateStyle`, `IThyDropContainerDirective`, `ExpNumIndex`, `Automerge.Diff`, `RadarrSettings`, `PopupService`, `ExpressionNode`, `BackgroundPageStyles`, `TranslateContainerConfig`, `NgGridItem`, `UnlinkFromLibraryAction`, `Enable`, `OutputTargetCustom`, `IProviderInfo`, `MeasureUnitType`, `IRouteMatch`, `DistributeArgs`, `ESLNote`, `ErrorReporter`, `OAuthService`, `AccessorConfig`, `AxesTicksDimensions`, `Prepared`, `TfCommand`, `SessionKey`, `StartChannelCommandInput`, `XYZProps`, `MnemonicSecret`, `Viewer`, `GridMaterial`, `FaunaId`, `CompletionParams`, `X12SerializationOptions`, `EncryptionContext`, `FunctionCallNode`, `sinon.SinonStatic`, `GdalCommand`, `requests.ListTaskRunLogsRequest`, `HttpClientRequest`, `Die`, `TypeQueryNode`, `OptionKind`, `JsonRPC.Response`, `M3Model`, `MgtFileUploadItem`, `AttributeContainer`, `NbToastrService`, `ChannelOptions`, `UserState`, `CatsService`, `FutureWalletStore`, `USBEndpoint`, `ISortCriteria`, `RequestSigner`, `CreateAuthorizerCommandInput`, `ConfirmationService`, `PrimitiveTypeKind`, `InstructionType`, `RunningGameInfo`, `tsc.Type`, `AlainConfigService`, `OperandType`, `RollupSingleFileBuild`, `Inspection`, `ResourceGroupXML`, `SyntaxDefinition`, `RouteAnimationType`, `PdfSolidBrush`, `TypeBuilder`, `ExpressionRendererRegistry`, `EmojiCategory`, `Translate`, `ITokenizer`, `ListBuffer`, `IKeyCombo`, `Solar`, `SingleProvider`, `TokenParams`, `AppVersion`, `RequiredOrOptional`, `ClientRequestSucceededEventArgs`, `ItemDefinition`, `Statements`, `Undefinable`, `PlayerProps`, `SuiModal`, `HttpClientRequestConfig`, `TemplatePieces`, `CtxLike`, `CustomAction`, `BuildrootAction`, `ReadOnlyFunctionOptions`, `Blok`, `PublicationRepository`, `ISiteScript`, `Events.entertrigger`, `ImageSource`, `SettingActionTypes`, `StandardTask`, `AnalysisRequest`, `EzBackend`, `Invitation`, `IModelBaseHandler`, `IUploadedFile`, `VcsItemRef`, `AggTypeState`, `KeyIndex`, `VennDiagramProps`, `JumpState`, `SearchResult`, `SearchCondition`, `FastifyError`, `xLuceneTypeConfig`, `DehydratedState`, `CustomRequest`, `JsonAst`, `AttrMap`, `RouteComp`, `TreeSelectItem`, `NightwatchBrowser`, `requests.ListMetastoresRequest`, `TransportTime`, `MaterialCache`, `ObjectDetectorOptions`, `React.RefCallback`, `FileEmbedder`, `ExecutionPureTransitions`, `IUserOrganization`, `instantiation.IConstructorSignature6`, `forceBridgeRole`, `FixtureFunc`, `TexCoordsFunction`, `Library`, `internal`, `V1CustomResourceDefinition`, `SetupApp`, `DropIndexNode`, `BorderStyle`, `IndexGroups`, `ParsedOrderEventLog`, `InstantiatedContractTreeItem`, `NamedDeclaration`, `ProgramAccount`, `ListenerFn`, `ImplDeployment`, `AppUpdater`, `EPersonDataService`, `SecureTrie`, `FileAccessor`, `UnionOrFaux`, `VisualizerInteractionTypes`, `DeleteIntegrationResponseCommandInput`, `TileMap`, `TitleVisibility`, `PlayerOptions`, `EncodeOutput`, `requests.ListInstanceagentAvailablePluginsRequest`, `K.ExpressionKind`, `Solution`, `AccountType`, `GetByEmailAccountsRequestMessage`, `YRange`, `HeaderColumnChain`, `KintoRequest`, `LatLngLiteral`, `HTTPHeader`, `AccessorCreators`, `StudioVersion`, `WebElement`, `ToastMessage`, `TexMtx`, `LogicalCpuController`, `ToastButton`, `IQuizFull`, `requests.ListTaggingWorkRequestsRequest`, `XData`, `TeamsMembersState`, `WFSerialization`, `MonacoEditor`, `Flow`, `LanguageVariant`, `QueryCapture`, `I`, `ResourceSettings`, `Color.RGBA`, `INotificationDocument`, `DataModel`, `ButtonSize`, `Plyr`, `NotificationsState`, `Bus`, `PlayerPageSimulation3D`, `ErrorHttpResponseOptions`, `IPC.IFilePickerFileInfo`, `HostComponent`, `ControllerOptions`, `RouterUrlState`, `PolicyFromES`, `MessageToWorker`, `Commutator`, `page`, `FilterHeadersStatusValues`, `Files`, `OpenBladeInfo`, `DynamodbMetricChange`, `ActionTypeExecutorResult`, `HeatmapDataSets`, `ImportMap`, `SpineBone`, `CompletionRecord`, `ReflectedValue`, `EventDeclaration`, `ConditionExpressionDefinitionFunction`, `EntityCollection`, `HiddenProps`, `ListConfigurationsCommandInput`, `AggTypeAction`, `TestUseCase`, `VisitorInvocation`, `CancellablePromise`, `PrettierConfig`, `IMusicInfo`, `MeshRenderer`, `child.ChildProcess`, `numVector`, `BreakpointState`, `CompressedEmojiData`, `FabricGatewayRegistry`, `MIRPCode`, `IconifyIcon`, `LessParser`, `SetupFn`, `vscode.TerminalDimensions`, `OneOrMore`, `CollisionShape`, `LogSummary`, `GetModelsCommandInput`, `SqrlExecutionState`, `AbstractToolbarProps`, `TxPayload`, `TestingProject`, `ExtensionProps`, `CloudFrontResponse`, `Vue.CreateElement`, `IDom`, `CompletionBatch`, `Cidr32Block`, `Browser`, `StackSeriesData`, `Shared.SubscriberFactory`, `requests.ListIamWorkRequestLogsRequest`, `btVector3Array`, `ArenaAttribute`, `ConfigurableEnumConfig`, `RolandV60HDConfiguration`, `ExpoConfigFacebook`, `ExpressionServiceParams`, `IHttpResponse`, `requests.ListWorkspacesRequest`, `JPattern`, `IMiddleware`, `GroupParameterMethod`, `IPnpmShrinkwrapDependencyYaml`, `FamilyPage`, `UsernamePassword`, `HTMLPreElement`, `CodeItem`, `Environment`, `ContainerOS`, `RegisteredServiceUsernameAttributeProvider`, `IChangelog`, `DocumentGenerator`, `SegmentedControlProps`, `GVBox`, `UnionType`, `Truncate`, `AcornNode`, `VSnipContext`, `IndexerManagementClient`, `DrawerProps`, `AudioInputDevice`, `DomApi`, `DescribeSubnetGroupsCommandInput`, `BuildConfigs`, `UserPasswordEntity`, `poller.IPollConfig`, `FormatValue`, `VariableDefinitionNode`, `Aurelia`, `B2`, `Http3FrameParser`, `CompilerOptions`, `NavigableSet`, `MediaSlotInfo`, `CreateChildSummarizerNodeParam`, `NSURLSession`, `IndexProperty`, `Applicative2C`, `ModalPage`, `PersistentState`, `ModelName`, `CustomerVm`, `MoonbeamCall`, `IRendererOptions`, `CoapServer`, `GlyphCacheEntry`, `ExpenseCategoriesService`, `FileSystemUpdater`, `RarePack`, `ClientErrorResponse`, `CurveExtendMode`, `MockApiClient`, `InterfaceTypeWithDeclaredMembers`, `HypermergeUrl`, `StoreDefinition`, `RecurringBillPeriod`, `TransactResult`, `IDependenciesSection`, `ListChildComponentProps`, `d.VNode`, `ChangeEventHandler`, `AttributePub`, `LookupByPath`, `DataSharingService`, `Vars`, `MsgPauseGroup`, `ThreadConnection`, `ObjectContext`, `IQueryParameters`, `FormulaBuilder`, `AssembledTopicGraphics`, `PartialVersionResolver`, `ComputedRef`, `WithKeyGeneric`, `BaseCallback`, `NetworkRequestId`, `GSConfiguration`, `ConditionGroup`, `AwsShapes`, `QueryResults`, `ReflectionObject`, `AccountBalancesResult`, `WhereCondition`, `CartoonConfig`, `Events.pointerleave`, `GraphQLEnumType`, `BareFetcher`, `TimeoutID`, `HelperOptions`, `ProblemDimension`, `UniqueObject`, `Electron.IpcMainInvokeEvent`, `MimeType`, `IRemindersGetByContactState`, `UniswapV2Pair`, `DotDotDotToken`, `AESJsonWebKey`, `PrimitiveField`, `GetMyOrganizationCommand`, `ILinkInfo`, `FMAT`, `TestingModuleBuilder`, `BandViewModel`, `TokenMarker`, `MouseUpAction`, `PadplusRoomInvitationPayload`, `ITimeToSampleToken`, `UserMentionEntity`, `Form`, `EntityCacheSelector`, `QueryParameterBag`, `DeleteDBClusterSnapshotCommandInput`, `PlatformRepositoryService`, `ThresholdedReLULayerArgs`, `BuiltRule`, `Vertice`, `RedocNormalizedOptions`, `StoreModule`, `ISdkBitrateFrame`, `TemplateCompiler`, `UnwrapRowsComputed`, `FormattingRequestKind`, `ASTVisitor`, `GetFindingsCommandInput`, `TranslationWidth`, `CanvasFontSizes`, `InternalVariation`, `VerifyErrors`, `ConfigMetaFormat`, `IGBPackage`, `SystemVerilogIndexer`, `ThemeObject`, `Histogram`, `WorkspaceFileWatcher`, `CreateErrorReportInput`, `IBinaryTreeNode`, `RuleFix`, `IDynoCollectionKeyValue`, `RegInfo`, `BalanceTransferPayload`, `TableComponentProps`, `PolyDrawing`, `TConfiguration`, `RemoveOutputRequest`, `AdbSocket`, `EditService`, `yubo.MessageService`, `ConfigRoot`, `StreamFrame`, `Ordering`, `BriefcaseDbArg`, `Place`, `DebtTransaction`, `DescriptorTypeResult`, `NullableDateLimit`, `AttrNode`, `CkbMintRecord`, `Aai20SchemaDefinition`, `AccessorFn`, `DraggableList`, `FilePropertyReader`, `CallMessage`, `CandleData`, `DataRecordValue`, `UiPlugins`, `MatchedFlow`, `VerifiableClaim`, `IResourceEntity`, `ParsedOptions`, `FilterList`, `DemoItem`, `NFT721V1`, `UpdateVpcLinkCommandInput`, `RenderSource`, `WebviewEvent`, `SupportContact`, `MetadataORM`, `SavedObjectsCreatePointInTimeFinderDependencies`, `d.JsonDocsEvent`, `CompositeDisposible`, `IndexedReadWriteXYZCollection`, `NormalizedFilter`, `IConcatFile`, `WrappedFunction`, `TestBackendTimer`, `Projector`, `LaunchContext`, `AirSchema`, `GX.Attr`, `StepperState`, `ChildRule`, `GasComputation`, `DecryptionMaterial`, `ListDeploymentStrategiesCommandInput`, `MixedIdType`, `filterSymbols`, `GroupAction`, `RemoteParticipantState`, `MagickColor`, `LaunchConfig`, `ParticipantListParticipant`, `FolderWithId`, `DataEntity`, `FoldersService`, `Maybe`, `IDataContext`, `IStaticFile`, `RpcKernel`, `MapOfType`, `ConcreteClass`, `ts.TypeLiteralNode`, `EslintConfig`, `VisualizationChartProps`, `TargetList`, `PongMessage`, `SelectionService`, `ClassList`, `DescribeDomainCommandInput`, `RenderInput`, `IInstantiationService`, `CdkDialogContainer`, `ExpressionOperand`, `PQLS.Library.TLibraryDefinition`, `ClipPrimitive`, `StoryProps`, `StoryFile`, `core.BTCAccountPath`, `MarkdownTreeNode`, `Secured`, `QueueData`, `WithStringLiteralProperties`, `AnySchemeForm`, `Heightfield`, `DeployedCodePackage`, `DaffCartStorageService`, `WorkSheet`, `BlockData`, `GetPublicKeyCommandInput`, `SModelIndex`, `Buckets`, `AppWithCounterAction`, `VerificationClient`, `FTP`, `Secp256k1`, `TNSImageAssetSaveFormat`, `DocumentationContext`, `TRouter`, `AlterTableAddColumnBuilder`, `IApiSnapshot`, `ServiceClient`, `ContainerItem`, `TsChipComponent`, `MigrationFeedback`, `FileSystemFileHandle`, `ArgumentCategory`, `FirmwareWriterProgressListener`, `CloningRepository`, `Survey`, `BabelConfigOptions`, `TilePath`, `MockCanvas`, `NativeSystemService`, `FileEntity`, `InvalidInputException`, `SymbolOr`, `d.LogLevel`, `Discipline`, `obj`, `IndigoOptions`, `IdempotentParameterMismatchException`, `DaffCountryFactory`, `Prop`, `ClassTransformOptions`, `DropTargetOptions`, `TextureBlock`, `DescribeLoadBalancersCommandInput`, `UpdateClusterRequest`, `BungieGroupMember`, `TypescriptServiceClient`, `Graphql`, `SuiCalendarItem`, `CurriedFunction5`, `ProjectItemImpl`, `PluginPackage`, `TECall`, `BoneAnimator`, `core.BIP32Path`, `ModelNode`, `NextStep`, `ArchiveEntry`, `VoidExpression`, `TabularLoaderOptions`, `VerdaccioConfig`, `Extrinsic`, `CallSite`, `Promise`, `AsyncCallback`, `Hookable`, `ExecutionContract`, `JsxFragment`, `MaterialAccentColor`, `ExprVisitor`, `HTMLTableElement`, `GraphQLInterfaceType`, `IMouseEventTrigger`, `StreamAction`, `NzSafeAny`, `EventActionHandlerMeta`, `DecodedDeviceType`, `Tracing`, `SortCriteria`, `ChannelSigner`, `FormFieldConfig`, `BuildVideosListQueryOptions`, `PluginVersionsClient`, `ProblemFileType`, `CodeAction`, `LockState`, `DeclineInvitationsCommandInput`, `VscodeWrapper`, `LifecycleState`, `DeploymentTemplateDoc`, `JWKInterface`, `SummaryCollection`, `SwitcherItem`, `NodeSourceType`, `EventToPrevent`, `DecoratorOption`, `BuiltAction`, `SeekProcessor`, `VirtualDevice`, `OrganizationalUnitConfig`, `LayoutBase`, `DeleteLifecyclePolicyCommandInput`, `AddressNonces`, `ListRenderItemInfo`, `DataSetupDependencies`, `CheckpointProps`, `IKsyTypes`, `ImageRequest`, `PreKeyBundle`, `DomElementGetter`, `TestDatum`, `React.ComponentType`, `Delayed`, `DependencyList`, `EngineArgs.ListMigrationDirectoriesInput`, `CheckIdTaskDto`, `requests.DeleteProjectRequest`, `SubscribeEvents`, `InsertOptions`, `Globber`, `DalgonaState`, `ModifyDBClusterCommandInput`, `FullPageScreenshotDataOptions`, `TypeErrors`, `common.ConfigFileAuthenticationDetailsProvider`, `AdapterGuesser`, `UpdateTagDto`, `BastionHost`, `ChannelMessageSend`, `TinyColor`, `L2Creature`, `TmGrammar`, `AVRInterruptConfig`, `MetaKey`, `Slur`, `ParsedHtmlDocument`, `Spatial`, `specificity.Specificity`, `ARNodeInteraction`, `CardCollection`, `ResolveModuleIdResults`, `GX.AlphaOp`, `SortedPatchList`, `DecodeData`, `DOn`, `GlTfId`, `PairsType`, `DocNode`, `SqlManagementClient`, `ClassLikeDeclaration`, `IconElement`, `RequestDetailsState`, `PrismaClientRustPanicError`, `CustomFile`, `TSubscribeHandler`, `SplitLayoutNode`, `JobLogOption`, `TemplateData`, `APIPost`, `BizResponse`, `DiagnosticWithLocation`, `BlockInfo`, `HttpConfig`, `requests.ListCrossConnectGroupsRequest`, `i128`, `TLMessage`, `ResultProps`, `CarouselProps`, `ConnectionInfo`, `JobRunLifecycleState`, `DebugInfo`, `Namespace`, `ConditionType`, `TypeAllocator`, `S3.PutObjectRequest`, `Physics2DServer`, `requests.ListDbNodesRequest`, `d.CopyTask`, `Explorer`, `ISqlRow`, `Figure`, `PackageMeta`, `STPCardBrand`, `ScannedPolymerProperty`, `DockerGlobalOptions`, `NZBAddOptions`, `FSMCtx`, `ITestEntity`, `EdgeLabels`, `AutoRestExtension`, `PackagePolicyInput`, `ComponentProps`, `vault`, `StepVariable`, `ExtrusionFeatureParameters`, `CoordinateExtent`, `DappInfo`, `CachedMetadata`, `ArDriveAnonymous`, `LoopNode`, `HandlerStateChangeEvent`, `MatchJoin`, `AggsStart`, `CombatLogParser`, `Seeder`, `NavigationEnd`, `Points`, `CardInterface`, `ICollectionOptions`, `StubXhr`, `GestureEventData`, `ClientKeyExchange`, `TPayload`, `PortRange`, `AdjacentList`, `FieldToMatch`, `CommandBase`, `HTMLScLoadingSpinnerElement`, `ImageCache`, `NodeIndex`, `StreamEvent`, `SingleBar`, `CollectorFetchContext`, `FullConfig`, `VideoFormat`, `PayoutNumeratorValue`, `TableBuilderComponent`, `Study`, `QueueServiceClient`, `ByteStream`, `TestServerHost`, `LayouterService`, `TangentType`, `ListLeaderboardRecordsRequest`, `ImageIdentifier`, `RepositoryEntity`, `ISlope`, `AnimationEntry`, `ContextValue`, `CommandLineArguments`, `PluginConstructor`, `TDiscord.GuildMember`, `CesiumProperties`, `SlashCommandStringOption`, `MemoryNavigator`, `Op2`, `theia.DocumentSelector`, `MetricsServiceSetup`, `RenderNode`, `IWebPartContext`, `Contracts`, `LocationObject`, `IStrapiModelExtended`, `Appender`, `RenderableOption`, `DeleteUtterancesCommandInput`, `OwnerItemT`, `MembersActions`, `ISliderProps`, `TabbedAggColumn`, `ConfigAction`, `arc_result`, `NetworkDefinition`, `Builtins`, `SolidityListener`, `IAmazonServerGroupDetailsSectionProps`, `BoundingBox`, `Events.visible`, `FormDataEntryValue`, `WithSubGenericInverted`, `FrameworkInfo`, `GlobalUserData`, `UpdateJobDetails`, `AppService`, `EdgeConfig`, `objType`, `google.maps.Polygon`, `azureBlobStorage.Container`, `BufferState`, `AddRepositoryPayload`, `IPropertyListDescriptor`, `dia.Paper`, `FabSettings`, `VisualizeAppStateTransitions`, `ICkbMint`, `SecureClientQuery`, `Type_List`, `KeyRingStore`, `StackLine`, `CustomClientMetadata`, `LifecycleFlags`, `PlayerInputModel`, `PathOrFileDescriptor`, `HSL`, `Tools`, `iTunesMusicMetaProvider`, `TransactionDetail`, `IPluginTimes`, `HashMap.Instance`, `FormMethods`, `E2EPageInternal`, `CompositeType`, `RouterConfig`, `IRawStyle`, `NSAttributedString`, `VariantHandler`, `firestore.QueryDocumentSnapshot`, `IconType`, `IAccount`, `ISetting`, `ISampleDescription`, `IStaticMeshComponentState`, `WebPhoneSession`, `RepositoryStateCache`, `SecurityGroup`, `TypesModule`, `TagsObject`, `RobotsByNameMap`, `Events.pointerup`, `CBCharacteristic`, `PartSymbol`, `TreeChanges`, `LocationOffset`, `MerchantOrderGoodsEntity`, `STColumnButton`, `TranslateList`, `SyncedDataObject`, `QRPolynomial`, `ElementStyle`, `ReferencedFields`, `LoadingOptions`, `SigningRequest`, `Webview`, `DocumentFragment`, `DescribeVolumesCommandInput`, `NodeInfo`, `IISystemProto`, `DialogflowMiddleware`, `Endianness`, `postcss.LazyResult`, `BaseInternalProps`, `UserSummary`, `ALL_POSSIBLE_CHART_TABS`, `JSONValue`, `Vfs`, `monaco.editor.IModel`, `PageTemplate`, `CanvasView`, `Opcode`, `msRest.HttpRequestBody`, `CreateConfigurationCommandInput`, `SerializedRootCID`, `NestedStagePanelsManager`, `Material_t`, `IToolchian`, `PasswordHistoryView`, `ISelectProps`, `Gallery`, `WorkingService`, `vile.IssueList`, `ShareStore`, `CreateMeetingCommandInput`, `GeoBounds`, `FileTypeResult`, `GameBits`, `ResourceCount`, `SourceLocation`, `ItemDescription`, `AnimGroupData_Shape`, `IFramework`, `ServerHello`, `Applicative1`, `CountableExpectation`, `CausalRepoIndex`, `CompactInt`, `BaThemeConfigProvider`, `ListPoliciesCommandInput`, `S3Service`, `TxHash`, `IFilterItemProps`, `Inventory`, `PathState`, `PersistedLogOptions`, `OptionEquipped`, `AnalyzeOptions`, `SerializeCssOptions`, `ValueMap`, `RealFileSystem`, `requests.ListDatabaseUpgradeHistoryEntriesRequest`, `jest.SpyInstance`, `IFeatureFlag`, `reminderInterface`, `VirtualContestItem`, `IApplicationHealthStateFilter`, `ApiProps`, `NPCActor`, `DefaultTallyConfiguration`, `C51BaseCompileData`, `ProxyController`, `DebugProtocol.PauseArguments`, `IQuiz`, `BasicCredentialHandler`, `DatatableVisualizationState`, `Graphic`, `DayPlannerSettings`, `ServerHelloDone`, `TagnameValue`, `ISwidget`, `ILoggerColorParams`, `SnapshotAction`, `TAtrule`, `Compute`, `ethereum.TransactionReceipt`, `AbstractCamera`, `ICardInfo`, `Pubnub`, `ProgressModel`, `ObjectTypeDefinitionNode`, `TwitchBadgesList`, `TransformCssToEsmInput`, `Tester`, `ComponentMetaData`, `ZoneSpec`, `SystemManagerImpl`, `mdast.Root`, `GridGraphNode`, `Apply3`, `i0.ɵViewDefinition`, `requests.ListDrgRouteDistributionsRequest`, `DropoutMasks`, `DisposableObservable`, `StudentEntity`, `OperationCallback`, `IProps`, `SessionsConfigSchema`, `ForgotPasswordAccountsValidationResult`, `PickPoint`, `GenericClientConfiguration`, `ProjectRepository`, `FieldFormatsStart`, `AtomFamily`, `ClassFacts`, `DeleteTagsRequest`, `GenericMerkleIntervalTreeNode`, `BuildRequestOptions`, `InteractionModel`, `TransactionVersion.Mainnet`, `SerialAPICommandMachineParams`, `ConfigArgs`, `ColumnDefs`, `MenuNode`, `ReifiedType`, `ProgressAtDayModel`, `TagMapping`, `SubnetGroup`, `AnyFunction`, `AlertInputOptions`, `CookieManager`, `THREE.Light`, `HtmlTag`, `AsyncGenerator`, `GlobalStateT`, `StateAccessingOptions`, `PureIdentifierContext`, `WorkRequestLog`, `PagingOptions`, `Lint`, `ReplyPackage`, `DropDown`, `ScrollbarOptions`, `Portal`, `TestingWindow`, `DataStream`, `OnboardingOpType`, `XAxis`, `BluetoothServiceUUID`, `domain.Domain`, `CustomHtmlDivFormatter`, `ResponseWrapper`, `UserInfoOidc`, `files.Location`, `S2`, `WithGenericsSub`, `RequestValues`, `requests.ListModelsRequest`, `InputTypeComposer`, `RoadmapProps`, `ChangeCipherSpec`, `MxObject`, `IPatchList`, `CommandRegistryImpl`, `OutputCollector`, `ThyDragHandleDirective`, `LanguageIdentifier`, `MessageServiceInterface`, `ConnectionProfile`, `FileStat`, `StatusCodeCallback`, `PackedBubbleChart`, `StatBuff`, `NzCellFixedDirective`, `t.AST`, `ThemeColorState`, `DynamicGrammarBuilder`, `TrackedBuffEvent`, `response`, `LayerName`, `SpreadAnalysisResult`, `JobHandler`, `PersistedState`, `IGitExtension`, `jasmine.SpyObj`, `AutocompleteRenderInputParams`, `_IType`, `ScaleObject`, `StreamLabsMock`, `PageNode`, `IAmazonApplicationLoadBalancer`, `MonthAndYear`, `FlattenSimpleInterpolation`, `FlowView`, `LexerContext`, `EntityDTO`, `CatDTO`, `GetAllAccountsRequestMessage`, `FsUri`, `RemoveTagsFromResourceMessage`, `UseQueryStringProps`, `QuestionFormatter`, `NativeSyntheticEvent`, `LocaleProps`, `CidConfig`, `AccordionComponent`, `TypeESMapping`, `SerializedCrdtWithId`, `FilterSettings`, `AnyTable`, `CorsOptions`, `EdmxEntityTypeV4`, `SBDraft2CommandLineToolModel`, `IAzExtOutputChannel`, `BIterator`, `NgModuleData`, `AppInsightsQueryResultTable`, `BytesReader`, `Regions`, `cp.ForkOptions`, `HappeningsValidationOutcome`, `AuthorizedClientRequestContext`, `DebeBackend`, `_TypedArrayConstructor`, `requests.ListPublicationPackagesRequest`, `DebuggerMessage`, `XActorRef`, `ts.LiteralType`, `MarkdownParsedData`, `ShadowRootInit`, `DeployBuilderOptions`, `ColumnExtension`, `CardFooterProps`, `Entire`, `requests.ListThreatFeedsRequest`, `SearchCommand`, `IDomainEntry`, `Sector`, `HyperlinkProps`, `Inbound`, `CustomFilterArgs`, `NotebookModel`, `Enemy`, `UrlGeneratorsStart`, `NgxSmartLoaderService`, `ExternalStyleCompiler`, `RnM2Material`, `QuestionCollection`, `WebCryptoDecryptionMaterial`, `DaffCategoryFilterRequestRangeNumericFactory`, `EnvFile`, `HeadersFunction`, `OasParameter`, `DroppableStateSnapshot`, `ContractEntryDefinition`, `HashedItemStore`, `CIMap`, `ProviderCallback`, `ace.Editor`, `ex.Input.PointerEvent`, `RehypeNode`, `MakiObject`, `BatchPutMessageCommandInput`, `ISpriteMeta`, `VideoProps`, `DejaSnackbarComponent`, `V1WorkflowOutputParameterModel`, `ThyFlexibleTextComponent`, `GridItem`, `DeleteBucketPolicyCommandInput`, `PeriodModel`, `ApiResourceReference`, `B.JsNode`, `EdgeCollider`, `BannerProps`, `ResolveReferenceFn`, `JsonRpcProvider`, `Examples`, `TableImpl`, `VcsAuthenticationInfo`, `CliGlobalInfo`, `ComponentStatus`, `HardhatRuntimeEnvironment`, `AccountProps`, `EnumLiteralType`, `Darknode`, `ProjectConfigChangedEvent`, `ControllerMethods`, `VirtualContestProblem`, `Printable`, `DatabaseCredentials`, `EquipmentSharing`, `ReadableByteStreamOptions`, `K.TSTypeKind`, `BLE`, `StaticRegion`, `AggTypeFieldFilter`, `TitleService`, `Bag`, `ChangeBundle`, `RawOperation`, `SettlementEncoder`, `CompositeSubscription`, `HsLaymanService`, `LinkSession`, `IDatePickerModifiers`, `TestExtension`, `ConfigurationCCBulkSet`, `Labels`, `StableTokenWrapper`, `vscode.WebviewView`, `BindingAddress`, `ReadonlyUint8Array`, `XYPosition`, `ControlPointView`, `Ethereum.Network`, `EmitterWebhookEvent`, `LineTypes.MessageOptions`, `SingleSelectionSet`, `AcceptCallbacks`, `Spell`, `MaxPooling1D`, `HTMLAttributes`, `Observer`, `PropertyChangedEventArgs`, `BeaconProxy`, `Array3`, `CreateTagsCommandInput`, `DescribeMLModelsCommandInput`, `ServiceDefinitionPaths`, `ListSession`, `theia.SemanticTokensLegend`, `MapStateToPropsParam`, `DynamoDB`, `Fun`, `ISettingRegistry`, `SpeakerActions`, `Destroyable`, `FluentRuleCustomizer`, `SurveyLogicAction`, `VariableDeclarationList`, `CursorBuffer`, `TileCoords3D`, `nock.Scope`, `DecimalSource`, `BEMHelper`, `ICandidateFeedbackCreateInput`, `ICompileOptions`, `KeyFrame`, `AsyncBlockingQueue`, `AuctionManager`, `DeleteRoomResponse`, `IManifest`, `ListChannelMessagesCommandInput`, `Model.Project`, `CompiledComponent`, `FungiblePostCondition`, `TypeSelectionProps`, `PublicKeyInfo`, `SQLTransaction`, `AnyClass`, `JStretch`, `ComponentCompilerListener`, `ClassAndStylePlayerBuilder`, `requests.ListUpcomingScheduledJobsRequest`, `ServerEventEmitter`, `CloudFrontWebDistribution`, `CacheNotifyResult`, `Procedure`, `P2`, `GameSettings`, `pingResponse`, `CueSet`, `Scan`, `AnimatableElement`, `DropdownState`, `IListQueryInput`, `StackNavigationProp`, `FunctionFallback`, `ast.UnaryNode`, `TkeyStoreItemType`, `TAttributes`, `AutocapitalizationInputType`, `CreateGrantCommandInput`, `PositionedTickValue`, `DescribeFileSystemsCommandInput`, `Categories`, `PrismaClientErrorInfo`, `ParserRuleContext`, `TestData`, `YColumnsMeta`, `NoteEditorState`, `InjectedAccountWithMeta`, `Watcher`, `_resolve.AsyncOpts`, `ArtColumn`, `Pairing`, `ResultView`, `FolderWithSubFolders`, `BaseWeb3Client`, `WalletObjective`, `ICtx`, `LogsEvent`, `CollectionManifest`, `ListOrganizationAdminAccountsCommandInput`, `QueryParam`, `ReactPDF.Style`, `IFriend`, `FontWeight`, `KeycloakAdminClient`, `Types.OutputPreset`, `RosettaOperation`, `d.SitemapXmpOpts`, `FuseNavigationService`, `ILineGeometry`, `FailoverDBClusterCommandInput`, `Trigger`, `GraphWorkspaceSavedObject`, `gameObject.Bullet`, `CompileContext`, `IHostedZone`, `DebugProtocol.ThreadsResponse`, `OmvFeatureFilterDescriptionBuilder.FeatureOption`, `GaiaHubErrorResponse`, `SyntaxInterpreter`, `OffchainDataWrapper`, `IStep`, `ClassPartObject`, `SSRMiddleware`, `Participant`, `DeprecationsClient`, `TwingFunction`, `DescribeServiceUpdatesCommandInput`, `SearchBoxProps`, `AdminUserEntity`, `FormModel`, `KeyboardNavigationAction`, `Pool`, `ListrContextFinalizeGit`, `DragSourceMonitor`, `thrift.Int64`, `ExpandResult`, `PanelActionParams`, `RequestExt`, `CardSpace`, `GeometrySector`, `MethodName`, `SearchResultPage`, `constructor`, `EngineType`, `postcss.Rule`, `IModelConnection`, `VideoCreateResult`, `ClassName`, `GraphQLFieldConfig`, `WFDictionaryFieldValueItem`, `requests.GetConnectionRequest`, `BitcoinPaymentsUtils`, `DouyuPackage`, `Announcement`, `ChoiceSupportOption`, `TSunCardConfig`, `GetRegexPatternSetCommandInput`, `d.CompilerCtx`, `google.maps.Marker`, `UseHydrateCache`, `TleParseResult`, `VisDefaultEditor`, `Equalizer`, `PriceSpecInput`, `IBranch`, `OpenerOptions`, `TimeWidget`, `HelperService`, `ResolvedEntityAtomType`, `FindTaskQuery`, `IInternalEvent`, `Faction`, `Observation`, `RelativeTimeFormatOptions`, `PreferenceInspection`, `EffectPreRenderContext`, `RefactorContext`, `GraphQLConnectionDefinitions`, `VpcSubnetArgs`, `ImmutableMap`, `$E.IBaseEdge`, `ISchemaCollector`, `Todos`, `GLTF`, `IEventEmitter`, `MDCTopAppBarAdapter`, `SandboxContext`, `VideoTexture`, `ResponseObject`, `TickResultEnum`, `CombinedScanResults`, `TypeGenerator`, `ObservableMap`, `SASQueryParameters`, `CodeWriter`, `ITypedNode`, `CoreType`, `GfxRenderTargetP_GL`, `VdmActionImport`, `ReaderFragment`, `Separate`, `DocHeader`, `SendEventCommandInput`, `TSupportedFaction`, `DataTransferEvent`, `EvaluatorUsage`, `CallState`, `MutationFn`, `MIMEType`, `HashPair`, `Texture`, `ThyAbstractOverlayOptions`, `OutputBundle`, `LoginStatusChecker`, `PredictionContextCache`, `SchemaContext`, `RxSocketioSubjectConfig`, `SelectorMap`, `IncomingWalletConfig`, `SharedCLI`, `NamedArrayBufferSlice`, `DebugVariable`, `TranslateResult`, `VirtualNetworkPeering`, `TmdbTvResult`, `StoreTypes`, `Ray3d`, `CLR0`, `GLRenderHash`, `SuiTabHeader`, `p5ex.CleanableSpriteArray`, `TextMarker`, `FloatValue`, `CdkDrag`, `Break`, `ExplorerExtender`, `FramePin`, `PiLangExp`, `DurableOrchestrationStatus`, `CoreTypes.LengthType`, `ExpandPanelAction`, `MiLayerData`, `SelectedScript`, `DocumentQuery`, `SchemeObject`, `BinaryHeap`, `ICircle`, `TypeEnv`, `AnimationKeyframe`, `K.FlowTypeKind`, `DiContainer`, `HotkeysService`, `RouterInfo`, `ShowNewVisModalParams`, `IonRouter`, `CephAngle`, `IdentityNameValidityError`, `RootSpan`, `DAL.KEYMAP_ALL_KEYS_UP_POS`, `ts.NewExpression`, `CdkDropList`, `InferableComponentEnhancer`, `ModuleSpec`, `FirestoreConnectorModel`, `FieldValue`, `MockedRequest`, `DoneCallback`, `CoreTypes.VisibilityType`, `MDCShapeScheme`, `CLIArgumentType`, `FeedbackId`, `DevtoolsPluginApi`, `CloudAssembly`, `ColumnData`, `AccountStatus`, `PiTypeStatement`, `StructuredError`, `VirtualConfig`, `IVisibilityJobPostInput`, `DescribeEventSubscriptionsCommandInput`, `ComponentConfig`, `CompositeLocale`, `PDFDocument`, `RecordPatternItem`, `ChromeMessage`, `OneOfAssertion`, `com.mapbox.pb.Tile.IFeature`, `HomePluginStartDependencies`, `VConsoleNetworkRequestItem`, `MatSnackBarConfig`, `ParsedMessagePartICUMessageRef`, `TValue`, `TestChannelArgs`, `Vec3`, `LayoutOptions`, `SchemaService`, `GetAssessmentCommandInput`, `PrefixUnaryExpression`, `IDescriptor`, `InlineControl`, `BooleanInt`, `DefaultVideoTransformDeviceObserver`, `IInstruction`, `ElementRunArguments`, `IRootReducer`, `TaskRepository`, `EthereumNetwork`, `MyUnion`, `IDocString`, `ContractsSection`, `KeyValType`, `IFluidHandle`, `_Connection`, `DAL.DEVICE_ID_BUTTON_A`, `Intent`, `ts.EntityName`, `ScrollBar`, `CreateBundle`, `PDFWidgetAnnotation`, `UniqueSelectionDispatcher`, `CapabilitiesResolver`, `esbuild.OnResolveResult`, `TableStringWriter`, `MatchmakerAdd_NumericPropertiesEntry`, `ObjectSelector`, `ListTableColumnsCommandInput`, `GX.IndTexWrap`, `IAssetState`, `PrevoteMessage`, `OpenSearchDashboardsConfig`, `PopoverContextOptions`, `IDispatchProps`, `EncryptedData`, `TypeaheadState`, `Token`, `NodeBuilderFlags`, `BadgeButtonWidget`, `PhysicsComponent`, `BezierPoint`, `GUILocation`, `Byte`, `KVStorageBackend`, `DataRange`, `DebugProtocol.Variable`, `RailsFile`, `ControllerConfig`, `InputModalityDetectorOptions`, `TupleAssignmentContext`, `IConnectionProfile`, `KeyResultTemplate`, `StringOrNull`, `DColorButton`, `ISPListItem`, `AnimationFrame`, `MDCSelectFoundation`, `DatePickerValue`, `Abi`, `IssuePayload`, `ComplexSelector`, `RewardVaultItem`, `ColumnDefinitionBuilder`, `GraphQLScalarTypeConfig`, `IdTokenResult`, `RGBAColor`, `CreateIndexBuilder`, `JsonRpcRequest`, `WeChatInstance`, `ExpansionResult`, `HLTVPageElement`, `EdmxFunction`, `NextApiResponse`, `IGatewayRoom`, `RuleTransition`, `UserCredential`, `ButtonText`, `InputType.StyleAttributes`, `KeyModifierModel`, `ServiceRoom`, `CallAgentProviderProps`, `Getter`, `FormOptions`, `RgbaTuple`, `CornerMap`, `OperationRequest`, `SVGLabel`, `ComponentTemplate`, `RedBlackTreeIterator`, `InternalOptions`, `RegisterInput`, `RoverInitialState`, `VaultAdapterMock`, `PublisherSummary`, `DefinitionInfoAndBoundSpan`, `AlgPartDecoration`, `ISideEffectsPayload`, `Ref`, `NgModel`, `Vector3_`, `RendererMock`, `AlertExecutionStatus`, `RequestTemplate`, `HTMLButtonElement`, `TileAniSprite`, `DocOptArgs`, `runtime.HTTPHeaders`, `KeyConnectorService`, `TriggerEngine`, `OcticonSymbol`, `PromiseRejectedResult`, `OmitsNullSerializesEmptyStringCommandInput`, `NodeMaterialConnectionPoint`, `SubgraphDeploymentID`, `DeleteFriendsRequest`, `IGradient`, `MemberFieldDecl`, `CommandLineOptionOfListType`, `AnimatorClassSettings`, `ArtifactSizes`, `TableDataSet`, `EndPointService`, `StatefulChatClientArgs`, `IndexedClassRewrite`, `NvRouteObject`, `CSSShadow`, `Minion`, `MdcDialogPortal`, `ActOptions`, `i64`, `ModelValue`, `InjectorIndexes`, `DidExecutedPayload`, `SymbolWithScope`, `EvaluatorOptions`, `OctreeObject`, `ReactFrameworkOutput`, `ResultMeta`, `requests.ListClusterNetworksRequest`, `IterationTypes`, `LobbyOverlayProps`, `ForgotPasswordRepository`, `NamedNode`, `Etcd3`, `DocumentRef`, `ShareTransferStorePointer`, `PropertyDecorator`, `SymbolTickerOrder`, `TT.Level`, `UseHydrateCacheOptions`, `ChartDataItem`, `IntervalJobOptions`, `AdditionalPropsMember`, `Fillers`, `StateAccessor`, `MiddlewareCreator`, `IDragEvent`, `BinarySet`, `CreateExtensionPlugin`, `CompareMessage`, `ShaderData`, `QuestService`, `ICommandBarProps`, `RxFormGroup`, `NSData`, `DefinitionYAMLExistence`, `CloudService`, `NSRange`, `CustomGradientFunc`, `K.IdentifierKind`, `RelationAttrInfo`, `OpenPGPKey`, `ViewOptions`, `SceneTreeTimer`, `DataArray`, `android.content.Context`, `SystemUserApi`, `interfaces.Request`, `BlenderPathData`, `DataWithPosition`, `UnitTypes`, `UseTransactionQueryOptions`, `AddEventListenerOptions`, `TabContainerPanelComponent`, `PartyService`, `CommandsCache`, `OnPushList`, `NormalizedScalarsMap`, `RecordProvide`, `Episode`, `DayElement`, `FrameManager`, `requests.ListWafRequestsRequest`, `DeleteTemplateCommandInput`, `PropTypeConfig`, `ParquetBuffer`, `HDWalletInfo`, `SupportedBodyLanguage`, `UsePreparedQueryOptions`, `PathDescription`, `ESLPanel`, `CertificateConfigType`, `MockSelector`, `TsCohortDateRangeComponent`, `PrefixUnaryOperator`, `GoogleActionsV2AppRequest`, `UpdateCheckResult`, `PositionStyleProps`, `Response.Response`, `VRMFirstPerson`, `ClientTools`, `PackageDependencies`, `ShorthandRequestMatcher`, `WebSiteManagementModels.StringDictionary`, `ISearchOptions`, `StarknetWindowObject`, `Matrix4x4`, `RateLimitState`, `IndexSignatureDeclaration`, `CmbData`, `NT`, `Verifiable`, `IsInstance`, `RefreshInterval`, `DialogActions`, `MediaStream`, `Sema`, `Rule`, `IOSInput`, `ParserTreeValue`, `NumberValidator`, `SignalRService`, `CreateRawTxOut`, `ResponserFunction`, `OurOptions`, `TemplateType`, `Override`, `BuildData`, `JWTPayload`, `ParityRegion`, `IPlayerState`, `ScriptTask`, `SubnetDescription`, `Animate`, `GraphQLRequestContextDidResolveOperation`, `ParamContext`, `DataPositionInterface`, `SelectContainerProps`, `AppleTV`, `Kwargs`, `ProgramInfo`, `InfoPlist`, `DeleteProjectCommand`, `LineAnnotationDatum`, `AggregatedColumn`, `RemirrorJSON`, `HTMLIonPopoverElement`, `LabelChanges`, `TraceSet`, `ArtifactFrom`, `K`, `IWorkerModel`, `Package`, `IJsonResourceInfo`, `FirebaseListObservable`, `IMethodHandler`, `SpecQuery`, `GridModel`, `TypedClassDecorator`, `PlayingState`, `ToolTipProps`, `ComplexBinaryKernelImpl`, `Cross`, `SubmitFnType`, `requests.ListUsersRequest`, `BaseResource`, `BaseData`, `ConnectionPositionPair`, `BrowserDownloads`, `androidx.fragment.app.Fragment`, `TimePickerComponentState`, `TableDifference`, `DidKey`, `ISessionRequest`, `InternalTimeScalePoint`, `IRouteItem`, `PieSectorDataItem`, `UserGeoLocations`, `IEntityOptions`, `AwsS3PutObjectOptions`, `DescribeGlobalClustersCommandInput`, `AnimationAction`, `RawShaderMaterial`, `EventPayload`, `ElectionMetadata`, `requests.ListGrantsRequest`, `AccountFacebookInstantGame`, `ts.server.PluginCreateInfo`, `LRParser`, `SIGN_TYPE`, `TestEnvironmentConfig`, `GetPromiseInvocationContext`, `TagResourceOutput`, `ListRevisionAssetsCommandInput`, `TypeElementBase`, `MDCRipple`, `Board`, `ServerSecurityAlertPolicy`, `ParsedTsconfig`, `Nightmare`, `WillExecutedPayload`, `ConfigurableEnumValue`, `MerchantEntity`, `Publisher`, `AdminCacheData`, `RenderFlex`, `OpenCVConfig`, `GX.KonstColorSel`, `ShortcutsTypes`, `CommanderOptionParams`, `RouteResponse`, `CasCommand`, `SubscriptionHolder`, `TransliterationFlashcardFields`, `GetAccountCommandInput`, `Moods`, `debug.IDebugger`, `ESMap`, `DisassociateServiceRoleFromAccountCommandInput`, `AnyRecord`, `TypedUseSelectorHook`, `SpectrogramData`, `SidebarItem`, `IAccountDataStore`, `ToneAudioNode`, `EthAsset`, `ConvertIdentifier`, `TimeHolder`, `IWhitelistUser`, `OptionProps`, `PluginResult`, `GetInsightSummariesCommandInput`, `NodeCheckFlags`, `TypeVarMap`, `ResponseDataAccessor`, `ModuleDependencies`, `RequestChunk`, `EngineResults.EvaluateDataLossOutput`, `ActionEffectPayload`, `InfluntEngine`, `point`, `GPGPUBinary`, `Executable`, `ClassStruct`, `ConceptInstance`, `V0RulesService`, `ISearchResultState`, `InvestDateSnapshot`, `sdk.CancellationDetails`, `RouterEvent`, `StatusCodes`, `CompiledHierarchyEntry`, `TradeFetchAnalyzeResult`, `RichTextProps`, `Valid`, `QueryResponse`, `StackOutput`, `OperationObject`, `WorkflowDto`, `InstanceLocator`, `MessageWithoutId`, `AvailableFeature`, `SitePropsIndex`, `AnalyzerState`, `CreateProcedureWithoutInput`, `IPointData`, `KeyofC`, `ArmFunctionDescriptor`, `Targets`, `IAmazonS3Credentials`, `Outbound`, `SimpleChartDataType`, `BuildWatchEmitter`, `FieldInfo`, `ICoverageCollection`, `RegistryKey`, `CommentState`, `TestEntity`, `SLabel`, `SearchEmbeddableFactory`, `FileWatcherEventHandler`, `CssFile`, `SEErrorRefresh`, `cc.RenderTexture`, `StorageObjectAck`, `JsxChild`, `ISubsObject`, `HashType`, `OutputFile`, `SignedState`, `CreateConnectionRequest`, `CheckoutState`, `ICkbBurn`, `FirebaseUser`, `CreateVpcPeeringConnectionCommandInput`, `TargetDiezComponent`, `QueryBeginContext`, `AttachmentInfo`, `GeneratedQuote`, `PluginState`, `ConstructorFuncWithSchema`, `ISharedFunction`, `IMaterialPbrMetallicRoughness`, `IGameObject`, `CreateTokenCommandInput`, `SerialAPIVersion`, `AbiOutput`, `SelectPopoverOption`, `HttpServerType`, `RivenMod`, `DetailsState`, `PropConfigCollection`, `HaredoChain`, `TSerDeOptions`, `SimpleContext`, `IImport`, `VectorSource`, `Demand`, `Buffer`, `WayPoint`, `Networks`, `HashData`, `CreateResult`, `UpdateUserCommandInput`, `StyleRules`, `SMTPServerSession`, `WindiPluginUtils`, `AnyToken`, `NormalMod`, `LayoutPane`, `IAssignment`, `HierarchyProvider`, `Cubelet`, `ModelerFourOptions`, `ObservableSet`, `DescribeServicesCommandInput`, `Converter`, `LoadBalancerListenerContextProviderPlugin`, `DashboardListingPage`, `DragDropData`, `ZWaveLogContainer`, `FirebaseApp`, `DiscordBridgeConfigAuth`, `CallOptions`, `ElementNames`, `ThrowAsyncIterable`, `MethodGetRemainingTime`, `Segment`, `CrawlerRunOptions`, `FlowNode`, `SendPayload`, `KeySchemaElement`, `LegacyCallAPIOptions`, `PhotoService`, `IDiff`, `ImageGallerySource`, `IBatteryEntityConfig`, `BaseField`, `Civil`, `WebSocket.MessageEvent`, `CANNON.Body`, `ScratchOrg`, `StandaloneDb`, `Octokit`, `ActivityHeight`, `language`, `MDCRippleAdapter`, `requests.GetZoneRecordsRequest`, `Artwork`, `StatusAction`, `ChangeDatabaseParameterDetails`, `MetricsModel`, `Interface`, `WebRequestMethod`, `OrgMember`, `Clef`, `CellArgs`, `HtmlTagObject`, `QueryDeploymentsRequest`, `Expr`, `ParsedCronItem`, `IndexPatternSelectProps`, `TiledMap`, `HsDimensionTimeService`, `requests.ListVolumeBackupsRequest`, `DeployProviders`, `TextEditorViewColumnChangeEvent`, `LossOrMetricFn`, `messages.DataTable`, `HTMLCollectionOf`, `RequirementBaseModel`, `ApplyResult`, `requests.ListTagNamespacesRequest`, `BezierCurveBase`, `Reffer`, `StorageOptions`, `InternalContext`, `MapPolygon`, `DNode`, `FrameContainer`, `ListAnalyzedResourcesCommandInput`, `Champions`, `TrackingOptions`, `NotificationTemplateRepository`, `FirebaseFirestore`, `JsonPatchOperationPathCombiner`, `TargetGroup`, `AuthorisationStore`, `BasicDataPropertyForAdvice`, `DiscordStore`, `DeploymentExtended`, `LocationDescriptorObject`, `UnionBuilder`, `vscode.ShellExecutionOptions`, `TypeToMock`, `ArticleStateTree`, `MediaDeviceInfo`, `HostPort`, `TokenConfigs`, `ObjectPage`, `Swarm`, `GregorianDate`, `ReboostPlugin`, `AccountStellarPayments`, `CompilerEventFsChange`, `TextGeometry`, `Enzyme.ShallowWrapper`, `EventSubscriptionsMessage`, `ArtworkData`, `Profile`, `CCValueOptions`, `JupyterMessage`, `IOpenSearchSearchResponse`, `Masset`, `BasicResumeNode`, `ThresholdCreateSchema`, `Identifier`, `StorageState`, `ITestWizardContext`, `Instruction`, `QlogWrapper`, `CompletionItemKind`, `PropertyOptions`, `Ignore`, `NodeCache`, `ReplacePanelAction`, `Jb2Adapter`, `Molecule`, `TLE.NumberValue`, `MdcDefaultTooltipConfiguration`, `WorkflowStepOutputModel`, `IPeacockSettings`, `UnaryOperationNode`, `CalendarObject`, `SNode`, `ExprVis`, `Tx.Info`, `TileKeyEntry`, `AggregatedApiCall`, `ThemeMode`, `EditableDocumentData`, `aws.iam.Role`, `EquipmentSharingService`, `Vertices`, `providers.Log`, `HTMLHtmlElement`, `IResolvedIDs`, `ListEntitiesCommandInput`, `RumPublicApi`, `ValidateFn`, `IOrganizationContactCreateInput`, `SerializedData`, `FactoryProvider`, `MaskingArgs`, `PathTransformer`, `Edges`, `ForkName`, `DeltaInsertOp`, `PreviewService`, `OrderDirection`, `ProjectInitializerConfig`, `StringScannerOutput`, `AxisLabelOptions`, `CashScriptListener`, `ISearchDataTemplate`, `VariableInfo`, `TextEditorElement`, `NEOONEDataProvider`, `ScopeContext`, `AggFilter`, `UploadItem`, `PublicAppInfo`, `PropertyConverterInfo`, `ts.InterfaceDeclaration`, `FnN4`, `RESTResponseDataType`, `StatedBeanContainer`, `UnpackNestedValue`, `BotAdapter`, `SpaceID`, `PackageJsonChange`, `ProfileService`, `PolarData`, `CdkHeaderRowDef`, `ForwardTsnChunk`, `LibraryComponentImpl`, `IconName`, `VideoStreamIndex`, `SampleInfo`, `MigrateAction`, `CommonAlertParams`, `SendDataMessage`, `ProjectMode`, `SpecializedFunctionTypes`, `OutHostPacket`, `DescribeImageVersionCommandInput`, `JoinNode`, `IMatrixConsumer`, `Sid`, `ReactDataGridFilter`, `IVSCServerManagerEventsHandler`, `ReaderTask`, `CALayer`, `TAbstractControl`, `Package.Target`, `IEsSearchResponse`, `DataTypeConfig`, `Dense`, `NetworkManager`, `ScenarioState`, `ZoneFileObject`, `BasicInfo`, `AssertStatic`, `Publications`, `YoonitCamera`, `ResourceConfiguration`, `RaycasterService`, `CustomSmtpService`, `TComponentConfig`, `BodyParser`, `GqlExecutionContext`, `NameValuePair`, `JassTimer`, `DefineComponent`, `DefaultAttributeDefinition`, `SampleView`, `OnFailure`, `TrackEntry`, `estypes.SearchRequest`, `Object`, `requests.ListApmDomainWorkRequestsRequest`, `Sigma`, `ParsedUrl`, `PackageLockPackage`, `AAAARecord`, `GatewayToConceptRequest`, `MakeHookTestStep`, `d.CompilerSystem`, `FieldFormatter`, `SchemaMatchType`, `TimeoutTask`, `ExpressionFunctionDefinition`, `FieldFormatId`, `PutConfigurationSetTrackingOptionsCommandInput`, `CompilerWorkerContext`, `MatchCreate`, `PropMap`, `LineStyle`, `SchemaTypes`, `AliasDeclaration`, `SelectorT`, `MetaIndexState`, `PublicKeySection`, `SearchFilterState`, `BezierCurve3dH`, `ITimelionFunction`, `OrganizationTeamEmployee`, `WexBimProduct`, `TodoList`, `StacksPrivateKey`, `ConeTwistConstraint`, `UIPreparationStorage`, `WordcloudViewModel`, `PaintServer`, `ZoneModel`, `AddressRecord`, `ng.IPromise`, `ModelTemplate`, `VersionBag`, `ArgumentListInfo`, `StandardizedFilePath`, `ListModelsResponse`, `ContextMenuItem`, `IIssue`, `Configs`, `Media`, `SimpleReloaderPlugin`, `GetOrganizationParams`, `GameStateRecord`, `ResManager`, `UpdateResourceCommandInput`, `PromisifiedStorage`, `viewEngine_ViewRef`, `StripeService`, `SyntaxErrorConstructor`, `NodeExtensionSpec`, `IPCMessage`, `Express.NextFunction`, `CurrencyCNYOptions`, `MockContractFactory`, `GetItemOutput`, `LogFilter`, `PlayerBattle`, `Bip32Options`, `AaiOperationTraitDefinition`, `CopyTask`, `ChainableTransform`, `RoomManager`, `ExpirableKeyV1`, `OneToManyOptions`, `TopMiddleBottomBaseline`, `ClientEngineType.Library`, `CredentialPreviewAttribute`, `SessionClient`, `SearchSessionsMgmtAPI`, `ProjectorPerformanceLogger`, `DiagnosticCategory`, `AutocompleteSettings`, `LayoutState`, `Sprite`, `InstanceTarget`, `GetStagesCommandInput`, `requests.ListKeyStoresRequest`, `PackageChangelogRenderInfo`, `Surface`, `IZosmfTsoResponse`, `PCancelable`, `AcceptTokenRequest`, `AttributeWeights`, `CreateApplicationVersionCommandInput`, `PDFAcroRadioButton`, `Appearance`, `ClaimData`, `TagEdit`, `V1PodList`, `LoggerTask`, `StateUpdate`, `StoredTx`, `LogConfig`, `PatternStringProperty`, `FbFormPermission`, `Define`, `AxisScale`, `AuthController`, `SlashParams`, `ValidateKeyboardDefinitionSchemaResult`, `ListProjectsResponse`, `ValueTransformer`, `IDimension`, `PutPublicAccessBlockCommandInput`, `IterableChangeRecord_`, `IndexPatternField`, `AuthSettings`, `OrderData`, `PriceHistoryMap`, `StringifiedUtil`, `CollectionData`, `ShardFailure`, `OutputTargetCopy`, `ExprNode`, `AssetReferenceArray`, `HomePublicPluginSetup`, `JSXElementConstructor`, `RemoveTagsFromResourceCommandInput`, `ListRenderItem`, `AttributionInfo`, `IObjectWithKey`, `PathValue`, `UpgradePlugin`, `LogRecord`, `BezierCurve`, `IDatasource`, `CrochetRelation`, `NumericRange`, `ScrollDirection`, `SxChar`, `LedMatrixInstance`, `UserPool`, `UserDataPropertyAPI`, `Credential`, `ProtocolVersionFile`, `CoinTransfer`, `AuthorizationContext`, `WpResourceConfig`, `PiCommand`, `PostItem`, `FinalizeHandler`, `NSDatabase.ITable`, `KeyboardEventToPrevent`, `HistoryAction`, `TxnIdString`, `sdk.SpeechSynthesizer`, `SavedObjectsClient`, `ImageAssetService`, `MessagesBag`, `InitializerMetadata`, `TagEntity`, `IFunctionAppWizardContext`, `RenderColorTexture`, `TensorInfo`, `uint32`, `PipelineStage`, `UsersEntity`, `DMMF.TypeInfo`, `PoolFactory`, `PreConfiguredAction`, `DaffBestSellersReducerState`, `ChaincodeResponse`, `ShaderAssetPub`, `EthereumTransactionTypeExtended`, `NetplayPlayer`, `Pixels`, `IndicatorCCGet`, `CipherService`, `Entries`, `AlertDialog`, `TruncatablesState`, `IValidatorConfig`, `InputControlVisDependencies`, `AudioOptions`, `DIDDocument`, `DeleteManyInput`, `Generic`, `SnippetSession`, `JobId`, `AssemblyData`, `ReplicationRule`, `ts.NamedDeclaration`, `CookiecordClient`, `PieChartData`, `PlatformContext`, `Knuckle`, `AuthHelper`, `NavigationButton`, `DecorationFileMap`, `KeywordErrorDefinition`, `ChatModule.chatRoom.ChatPubSubMessage`, `Indexed`, `JsonResponse`, `DelayedRemovable`, `JRPCEngineNextCallback`, `TextmateSnippet`, `AlarmSensorType`, `IncomeService`, `FetchGroup`, `HierarchyIterable`, `StartImportCommandInput`, `ExpressionFunctionVarSet`, `LockFileConfigV1`, `Screenview`, `Attitude`, `QAction`, `RollupCommonJSOptions`, `TSVideoTrack`, `mssql.config`, `ListVodSourcesCommandInput`, `PackagerAsset`, `RoosterCommandBarProps`, `ChartKnowledgeBaseJSON`, `HandlerCallback`, `Wallet`, `PropertySchema`, `HapiAdapter`, `CompletionPrefix`, `GetDeploymentCommandInput`, `HandlerResourceData`, `Icu`, `Vidi`, `IQueryParamsResult`, `MessageCreateOptions`, `VIdentifier`, `UrlGeneratorContract`, `MigrationOpenSearchClient`, `PoseNetOperatipnParams`, `ServerArgs`, `ISystemActions`, `ExpressionRegexBuilder`, `IConfigurable`, `MongoPagination`, `PathObject`, `IUserGroupOptions`, `ContractDefinitionContext`, `QuotaSettings`, `DataConverter.Type`, `TransformConfigUnion`, `ColumnDefinition`, `LegendItemList`, `WithStatement`, `Unwatch`, `ConstructorOptions`, `NameValueDto`, `RecordSubType`, `Pool3DProgram`, `SourceOffset`, `NgGridItemConfig`, `CalendarRepository`, `TMouseEventOnButton`, `ConfiguredProject`, `RecordStringAny`, `IVConsoleNode`, `BarFile`, `HTMLPreviewManager`, `ModelList`, `IRenderParameters`, `CanaryMetricConfig`, `SideBarTabModel`, `DeleteResourcePolicyCommand`, `SectionMarker`, `Associative`, `InterpolationConfig`, `ProblemIndication`, `ColorOp`, `CapabilitiesService`, `POIDisputeAttributes`, `DeleteAppInstanceAdminCommandInput`, `requests.ListInstanceConsoleConnectionsRequest`, `TokenScanner`, `BlockAction`, `DescribeRecipeCommandInput`, `EntryList`, `ConnectorDeclaration`, `WalletInit`, `Play`, `SeekOutput`, `ConfigAccount`, `PackagedNode`, `MatchData`, `PurgeHistoryResult`, `ComponentTable`, `BaseClientOptions`, `ContextMember`, `typedb.DBMethod`, `MatStepperIntl`, `SuiLocalizationService`, `ICustomer`, `ITypedDump`, `EventBinderService`, `IWaterfallTransaction`, `ScaleData`, `cxapi.CloudFormationStackArtifact`, `apid.VideoFileId`, `Prize`, `URLSearchParamsInit`, `OOPTypeDecl`, `IFieldExpression`, `ForumAction`, `CipherRequest`, `Sender`, `IdentityIndex`, `Quest`, `TSpanStyleProps`, `TSelectActionOperation`, `HsDialogContainerService`, `sdk.RecognitionEventArgs`, `Code`, `Route`, `UserProps`, `ListWorkRequestErrorsRequest`, `PositionObject`, `FieldValidationResult`, `InterfaceAlias`, `ITypescriptServiceClient`, `BlogPost`, `TableConfig`, `UpdateValueExpression`, `TaskArguments`, `TileStyle`, `DAL.KEY_BACKSPACE`, `Yarguments`, `BoxPlotPoint`, `UserQueryTask`, `TreeService`, `Purchase`, `IManagementApiClient`, `IFluxAction`, `IInviteAcceptInput`, `IVariantCreateInput`, `PropEnhancers`, `ClientLocation`, `IStateTreeNode`, `IParsedPackageNameOrError`, `ExtProfile`, `Company`, `ShaderityObject`, `ParsedTemplate`, `SdkRemoteParticipant`, `ClientRemote`, `ArtifactVersion`, `Bodybuilder`, `next.SketchLayer`, `UsableDeclaration`, `VcsAccountDatabase`, `TransformNode`, `WalkState`, `ShapeBase`, `EncodedMessage`, `ts.ModuleDeclaration`, `HistoryLocation`, `NVMOperationsResponse`, `TimestampTrigger`, `SubscriptionLike`, `Easing`, `FakeData`, `BrowserView`, `SidebarMenu`, `OutputContext`, `TestHelpers`, `SetIconMode`, `...`, `TSTypeReference`, `IMouseEvent`, `LoggerFormat`, `IPackageDescriptorMap`, `NavLink`, `MapMarker`, `ISession`, `Puzzle`, `BinData`, `VisualizeEditorVisInstance`, `ITextFieldExpression`, `RootBank`, `CreditCard`, `TOCMenuEntry`, `HappeningBreakpoint`, `ResourceTimeGridWrapper`, `BannerState`, `Offsets`, `Model.Page`, `MapViewApp`, `DictMap`, `ImmutableObjectiveTag`, `ListEndpointOptions`, `IHTMLElement`, `MaskListProps`, `Models.Timestamped`, `LogAttributes`, `SerialPort`, `monaco.editor.ICodeEditor`, `ExpressionParseResult`, `StatelessComponent`, `PrebootDeps`, `DiagnosticMessageChain`, `ExecutableItem`, `ContentChange`, `ITextAreaProps`, `ServiceCatalogSummary`, `ng.ILocationService`, `A0`, `AnyOf`, `IsWith`, `SavedComments`, `ITileDecoder`, `TileKey`, `ComponentCompilerProperty`, `RecordModel`, `ValidationProblemSeverity`, `IHttpService`, `alt.RGBA`, `IThriftField`, `TabElement`, `ScopedDocument`, `ApiClientResponse`, `GetClientFunction`, `ArrayService`, `TaskInput`, `DescribeEventSubscriptionsMessage`, `ClientRequestResult`, `MUserWithNotificationSetting`, `TemplatesManager`, `RedPiece`, `PackagePolicyVars`, `AuxConfig`, `NextApiRequest`, `lsp.Diagnostic`, `DescribeReplicationInstancesCommandInput`, `StylableMeta`, `IMetadataStorage`, `ThingsPage`, `UserStorage`, `TelemetryRepository`, `Kind3`, `LocalDatabase`, `ParameterValue`, `BotonicEvent`, `EncryptedObject`, `ConstraintSolver`, `ResultFilter`, `Oni.Plugin.Api`, `SchemaRegistry`, `ArrayBuffer`, `ReactSource`, `ARUIViewOptions`, `QueryResultRowTypeSummary`, `EventHint`, `CompilerFileWatcherEvent`, `ActionDefinitions`, `BitcoinishPaymentTx`, `ColumnRow`, `HsAddDataCommonService`, `HDNode`, `VirtualMachineRunCommand`, `HelpRequest`, `RenderFunction`, `XmlMetadata`, `Cypress.ObjectLike`, `TrustIdHf`, `FieldDescription`, `ProductAction`, `EbsMetricChange`, `RTCConfiguration`, `DaffProductServiceInterface`, `CommandHandler`, `ErrorMessages`, `BuildOutput`, `TSTypeParameterDeclaration`, `TObj1`, `ComparisonOperand`, `LocalForage`, `SetToken`, `NodeJSKernelBackend`, `SemanticType`, `NgbModalRef`, `TimeChangeSource`, `NamedTypeNodeDef`, `ThemeData`, `d.HostElement`, `SectionConfig`, `ReAtom`, `DebugState`, `ImageUse`, `QueryStart`, `GraphQLInputType`, `ListInputsCommandInput`, `RetryStrategyOptions`, `DiscussionReplyEntity`, `MigrationDiff`, `IMappingsState`, `TrainingZone`, `AuthenticationConfiguration`, `DaffCartOrderReducerState`, `EquipmentSharingPolicy`, `SyncConfig`, `ResourceNotFoundException`, `ClaimantInfo`, `Vector2d`, `LongHeader`, `LoadingLastEvent`, `CalendarProps`, `RetrievedCredentials`, `RouteSegment`, `TodoStore`, `FormattingOptions`, `LanguageModes`, `QueryDefinition`, `CreateAliasCommandInput`, `KeycloakService`, `BehaviorTreeNodeInterface`, `IWarrior`, `GetUserSuccessPayload`, `CustomPriceLine`, `PermissionOverwrites`, `ACrudService`, `InternalRequestParams`, `SettingsProvider`, `StatGroup`, `GetConfigOptions`, `MerkleProof`, `Notebook`, `HelpObj`, `ElementInstance`, `TestSetupBuilder`, `Arithmetic`, `EmptyIterable`, `GitFileStatus`, `RelativeDateRange`, `SuiDropdownMenuItem`, `NodeWithId`, `TypedMessageRendererProps`, `ListenCallback`, `RouterLocation`, `AbiEvent`, `anyNode`, `RectangleNode`, `ComputerPlayer`, `ZWaveError`, `ElementStyles`, `AuthStateModel`, `ApiModel`, `HeaderSetter`, `CfnCondition`, `VideoSource`, `ComicDownloaderService`, `ConsumerParticipant`, `GunGraphConnector`, `AMock`, `TwitchChatMock`, `AsObject`, `IValidationContext`, `PS`, `IndyWallet`, `estypes.SearchResponse`, `WriteRequest`, `PostResult`, `InitialStatistics`, `PolicySummary`, `VisConfig`, `TypingGenerator`, `ESTree.MemberExpression`, `MiddlewareMetadata`, `ScrollItem`, `HandleElement`, `RolesFunc`, `ShoppingCartItem`, `TTurnAction`, `ForceGraphNode`, `ModifiersArray`, `PanResponderGestureState`, `BaseTranslatorService`, `SpaceProps`, `GroupedObservable`, `MfaOption`, `CompileState`, `FieldUpdates`, `HandlerMetadata`, `EntityEffects`, `ITokenPayload`, `CallbackObject`, `CoordinateConverter`, `P1`, `QueryLanguage`, `ContentReference`, `EtherscanClient`, `StatementAst`, `DeleteButtonProps`, `VarInfo`, `ReactText`, `GetUserSettingsReadModel`, `JSONProtocol`, `GreetingWithErrorsCommandInput`, `PadchatRpcRequest`, `ApiAdapter`, `RepeatVector`, `BackgroundReplacementOptions`, `DMMF.Datamodel`, `Developer`, `StrategyParameterType`, `UniqueKey`, `PolusBuffer`, `AstEditorProps`, `ViewType`, `MongoEntity`, `Change`, `TileCoordinator`, `ImportDeclarationStructure`, `GetDeviceCommandInput`, `DnsValidatedCertificate`, `DeleteClusterCommand`, `ToolsSymbolInformationRequest`, `x.ec2.Vpc`, `SortingOrder`, `TRPCError`, `HttpServiceBuilderWithMeta`, `LoggingConfig`, `RenderModel`, `ListChannelsCommandInput`, `DashboardData`, `ISPListItems`, `FixtureSetupDeps`, `PathAddress`, `XDate`, `ClaimItem`, `FModel.LoadSettings`, `ParsedResults`, `RendererFactory2`, `DataPromise`, `ScrollSpyService`, `RpcSocket`, `ValueState`, `IsSkipFeature`, `EdmxEntitySet`, `RowViewRef`, `Matrix4`, `IPropertyData`, `TestFrontstage`, `JsonExpr`, `DescribeDatasetImportJobCommandInput`, `AdminProps`, `JsonLocations`, `NormalBold`, `HeaderInfo`, `ValVersion`, `TEventType`, `Events.start`, `IZoweUSSTreeNode`, `ZimCreator`, `ChangePart`, `RichTextComponents`, `CreateClusterCommandOutput`, `Deno.Listener`, `RoundArray`, `AnyField`, `ChapterStatus`, `DirectoryInfo`, `PlayerViewState`, `IKeyValue`, `PlayerViewCombatantState`, `MiddlewareMap`, `GitFile`, `StoreAction`, `MountOptions`, `BaseUIElement`, `ElementGeometryResultOptions`, `TypeEvaluator`, `QuicTags`, `TokensService`, `NzDrawerRef`, `OnDemandPageScanResult`, `GlobalTag`, `TypeGraph`, `SpeechServiceConfig`, `StatusNotification.Status`, `CdkStep`, `ListApplicationsResponse`, `PathOptions`, `IRichPropertyValue`, `Regularizer`, `ProcessorInternal`, `ReLU`, `ExportMap`, `ContractCallReturnContext`, `FoundNodeFunction`, `River`, `ContentService`, `WatermarkedType`, `JasmineTestEnv`, `CanvasEngine`, `Persist`, `IBytes`, `BoxrecBasic`, `collectSignaturesParams`, `ts.PropertyName`, `PutSessionCommandInput`, `NextCallback`, `TimelineTheme`, `DefaultComponent`, `MDCLineRippleFoundation`, `UdpTransport`, `ColorAxis.Options`, `RootTestSuite`, `CollectionChangedEventArgs`, `CreditWords`, `Eris.Message`, `ResolvedLibrary`, `IStorageSchema`, `AccordionStore`, `ColumnPropertyInternal`, `RxCacheService`, `MessageReaction`, `IButton`, `IOptionSelectText`, `HapiHeaders`, `WearOsListView`, `DocView`, `ContractVerificationInput`, `WalletContextState`, `Workbench`, `ImagePicker`, `Portfolio`, `RequestCompleteEvent`, `UserBuildConditionals`, `EnvSection`, `Stanza`, `IPropertyComponentProps`, `AmbientLight`, `ComponentCompilerPropertyComplexType`, `HTMLHeadingElement`, `LabelValuePair`, `GetSymbolAccessibilityDiagnostic`, `SqlBuilder`, `IFormData`, `GitHubRepository`, `backend_util.Conv3DInfo`, `ReactionCanHandleOptions`, `MeetPortalAnchorPoint`, `CallbackFunction`, `FileRepositoryService`, `MessageChannel`, `ServerRegion`, `UpdateNote`, `OrderByStep`, `MagitRemote`, `VaultData`, `RegisteredConnector`, `CommandLineOption`, `SignatureHelpItems`, `TypeFlags`, `dayjs.ConfigType`, `ABLVariable`, `ChannelAnnouncementMessage`, `Organization`, `TabbedAggRow`, `DeploymentDocument`, `ContextInternal`, `CombineParams`, `GraphRbacManagementClient`, `ExtraButtonComponent`, `EntitySet`, `NonEmptyString`, `OPCUAServer`, `MaxSizeStringBuilder`, `ScrollPosition`, `ChatMessage`, `IFluidDataStoreChannel`, `Messages.BpmnEvents.TerminateEndEventReachedMessage`, `DaffSeoNameMetaDefinition`, `TSInterfaceDeclaration`, `PasswordSchema`, `WhitelistType`, `GfxTextureP_GL`, `FSA`, `RegisterX86`, `KeyIcon`, `Sequelize.Sequelize`, `OptionsObject`, `Widget.ChildMessage`, `Mnemonic`, `ISharedMap`, `ChildItem`, `Col`, `MapLayerSource`, `Renderer3`, `GlobOptions`, `ClientRule`, `DispatchFunc`, `CssNode`, `ColorService`, `UpdateManager`, `AppAndCount`, `ApiDefinition`, `Complex`, `MethodResponse`, `EndorsementPolicy`, `d.HttpRequest`, `TsGenerator.Factory.Type`, `SegmentId`, `MultiSliderProps`, `AppEventsState`, `IInterpreterRenderHandlers`, `ServerSocket`, `SwitchProps`, `EnvironmentVariable`, `RuleCatalog`, `CancelToken`, `SpriteArray`, `PrefetchOptions`, `ItemType`, `PgClient`, `V1ClusterRole`, `RemoveRoleFromDBClusterCommandInput`, `MdcChip`, `IReferenceType`, `ReactiveEffect`, `TSerializer`, `ITionPlatformConfig`, `TriangleOrientation`, `E2EElementInternal`, `CompoundStatementContext`, `MockedObjectDeep`, `ContentDimensions`, `FaunaData`, `DocumentWrapper`, `MXDartClass`, `DepthwiseConv2DLayerArgs`, `SocketProxy`, `IOpts`, `XmlComponent`, `TokenEndpointResponse`, `IOpenSearchDashboardsMigrator`, `SlidingWindow`, `RunnerOption`, `EntityRemote`, `SurveyForDesigner`, `CompareType`, `TableSchemaSpec`, `ProofRequest`, `AfterCombatHouseCardAbilitiesGameState`, `requests.ListSnapshotsRequest`, `Action`, `ColumnChartOptions`, `FactoryKey`, `IRCMessageData`, `BlockClassSelector`, `KeyLike`, `UpdateBotCommandInput`, `PrunerT`, `JoyCon`, `NextHandler`, `WorkspaceChange`, `NzConfigService`, `UserCourseModel`, `SystemMessage`, `NgScrollbarBase`, `ElectronLog`, `EsAssetReference`, `ButtonStyleProps`, `Level1`, `SecretVersion`, `KeyInKeyof`, `Checkout`, `CacheManagerContract`, `TemplateState`, `BookingState`, `StyledComponent`, `GroupSpec`, `INodeStatus`, `ListingDefinitionType`, `DragDropService`, `RecentlyClosedEditor`, `StackMode`, `ModifierType`, `AsyncThunk`, `PanelPlugin`, `ReadonlyVec`, `WriteOptions`, `IInstanceDefinition`, `TimeGridViewWrapper`, `ICommonHeader`, `K.BlockStatementKind`, `GX.TexFilter`, `MaterialColor`, `ExpressLikeStore`, `StringArray`, `Http3ReceivingControlStream`, `USSEntry`, `HeaderBag`, `VirtualNetwork`, `DmarcState`, `FontNames`, `WhereExpressionBuilder`, `PromiseAndCancel`, `SiteConfiguration`, `ResourceDayGridWrapper`, `FailureEventData`, `DbBlock`, `Either`, `CategoryStub`, `WidgetFactory`, `TransformStream`, `TileView`, `OpeningHours`, `ThyIconRegistry`, `ReflectType`, `ITransactionIdentifier`, `ConnectionsManagerService`, `mongoose.Error`, `Events.postdebugdraw`, `TexMatrixMode`, `AnySchemaObject`, `IOidcIdentity`, `ApiDefService`, `AnyProps`, `TReturn`, `CONTENT`, `MetaFunction`, `IAudioMetadata`, `NewPackagePolicy`, `UserInterests`, `SfdxFalconError`, `CustomersGroupState`, `Cartographic`, `OrganizationalUnitPath`, `MIRVirtualMethodKey`, `AwsRegion`, `EmbeddableStart`, `NamedExports`, `NavItem`, `SwaggerBaseConfig`, `React.HTMLAttributes`, `EntityCollectionReducer`, `CacheTransaction`, `JSDocTypeReference`, `UriLocation`, `A11ySettings`, `DeployedPlugin`, `DFS_Config`, `IMatrix33`, `Value`, `EntityDefinition`, `PatternLiteralNode`, `Pointer`, `Fp`, `_1.Operator.fλ.Stateless`, `HighPrecisionLineMaterial`, `ValueDescriptor`, `RecordSourceProxy`, `core.JSCodeshift`, `IUserSettings`, `WebClient`, `IFieldMap`, `CustomBinding`, `polymer.Element`, `LF`, `IRenderingContext`, `JsonPatchOperationsState`, `RegisteredDelegate`, `WordCloudDataPoint`, `ContextState`, `CompletionItemProvider`, `LibraryStoreItem`, `DeleteDestinationCommandInput`, `Assertions`, `PreferenceProvider`, `PanelsState`, `TheiaDockPanel`, `zowe.IDownloadOptions`, `ServiceQuotaExceededException`, `IssueTree`, `HalResource`, `StackCardInterpolationProps`, `TiledMapFeatureData`, `IExecutorHandler`, `ReactPortal`, `AnyValue`, `MDCTopAppBarBaseFoundation`, `Segment1d`, `Wine`, `ISearchResponse`, `KeysToCamelCase`, `StatusController`, `EdgeRouting`, `apid.GetRecordedOption`, `EnabledFeatureItem`, `EmbeddableChildPanelProps`, `GitReference`, `ServiceUnavailableException`, `IContextualMenuProps`, `WindowRefService`, `FileUri`, `ConfigurationProperty`, `IArrivalTimeByTransfers`, `SearchScope`, `ModOutput`, `MDCButton`, `JsonDocsMethod`, `TypeAttributeMapBuilder`, `ITimeLogFilters`, `Doctor`, `TData`, `GraphService`, `FilePickerProps`, `RPCDescriptor`, `Errback`, `StackedRNNCellsArgs`, `Retro`, `RuntimeCacheInterface`, `PromiseFast`, `StartPoint`, `BatchRequestSerializationOptions`, `CompilerEventBuildStart`, `EmergencyCoordinates`, `BaseAsset`, `FindSelector`, `UnparseIterator`, `ListTagsRequest`, `TestElementRefersToElements`, `CreateBundleDTO`, `ChokidarEvents`, `Operator.fλ.Stateless`, `TFLiteDataType`, `SFAAnimationController`, `IEmployeeJobsStatisticsResponse`, `findUp.Options`, `TransformHeadersAgent`, `PointProps`, `TCalendarData`, `DeleteUserCommandOutput`, `PoolMode`, `ConvertComponent`, `ComputedOptions`, `IParticleValueAnimation`, `Io.Reader`, `Transaction.Info`, `Field`, `IOption`, `MDCChipAdapter`, `IUriMap`, `IGetSurveyModelsResponse`, `Github`, `NormalExp`, `SortKey`, `IntrospectionResult`, `System_Object`, `TIn`, `WalkNext`, `IRestApiContext`, `BackgroundRepeatType`, `AliasEventType`, `LanguageTag`, `SubscriptionClient`, `IManifestArmor`, `Git.GitVersionDescriptor`, `requests.ListAutonomousVmClustersRequest`, `LayoutAction`, `GraphQLInputField`, `SignatureHelpParameter`, `CreateReplayDto`, `ErrorWithMetadata`, `IBlockchain`, `QueryContext`, `SummaryArticle`, `TrimmedDataNode`, `EC2Client`, `StoreItem`, `PopupModel`, `MsgAndExtras`, `CachedMapEntry`, `InternalLabConfiguration`, `DocumentLinkParams`, `ActionCreators`, `ITextAndBadge`, `ProgramOptionsList`, `DashboardViewportProps`, `NodeSourceOption`, `VisTypeTimeseriesRequestHandlerContext`, `AddressFormat`, `PairData`, `GUID`, `LocalButtonProps`, `CompileOptions`, `BooleanCB`, `ChainableComponent`, `IProposal`, `PrimitiveTarget`, `Newline`, `WalletEntry`, `SelectQuery`, `InstancePrincipalsAuthenticationDetailsProvider`, `DocumentGeneratorItem`, `ChapterRow`, `_BinaryWriter`, `MatchJoin_MetadataEntry`, `EditorDescription`, `Rgba`, `ContainerBindingEvent`, `LRUItem`, `V2`, `EmusakEmulatorsKind`, `ApolloResponse`, `PatchSource`, `QCNode`, `CrudRequestOptions`, `FILTERS.PHRASES`, `RadixTreeNode`, `IModDirection`, `datetime.DateTimeData`, `Dungeon`, `CodeVersions`, `XTheme`, `FilterLabelProps`, `TransferService`, `Resource`, `VideoComponent`, `NotificationDataOptions`, `IMigrator`, `TImportOptions`, `PurchaseInfo`, `IFormControlProps`, `OpenDateRange`, `ResourceState`, `InsertionType`, `InvoiceQuotation`, `ImportDeclaration`, `GetFieldsOptions`, `RuleWithId`, `Pooling2DLayerArgs`, `ShellResult`, `AppSocket`, `MutableCategorizedPrimitiveProperty`, `OPCUAServerEndPoint`, `Bm.Dest`, `ListTagsForResourceRequest`, `IAuthorizer`, `SVInt`, `ECB`, `GuardFunction`, `InstanceMember`, `ThemeProviderProps`, `PromiseFunction`, `NetworkRequest`, `IPeerLogic`, `GX.TevBias`, `ModuleBuilderFileInfo`, `SourceDetails`, `ServerMode`, `NextComponentType`, `ListFilesStatResult`, `IAPIFullRepository`, `ThemeService`, `ReadOptions`, `HistoryResponse`, `WType`, `SanityChecks`, `LookupSorter`, `PropertyChangedEvent`, `DAL.DEVICE_ID_ACCELEROMETER`, `TemplatePositionContext`, `ErrorLocation`, `Locatable`, `DrawEvent`, `AuthUserContext`, `ToneConstantSource`, `DirectionsType`, `BspSet`, `ResolvedFile`, `XYZStringValues`, `SpotLight`, `Slides`, `EvaluateMid`, `FsUtil`, `InputLayerArgs`, `CryptoKeyPair`, `CreateConnectorResponse`, `EvaluateCloudFormationTemplate`, `RawConfigFile`, `CurriedFunction2`, `EdgeImmut`, `ImportInfo`, `NavigationNavigator`, `RSS3List`, `ConceptMap`, `EdgeGeometry`, `CardPile`, `IndicesOptions`, `ProcessedTransaction`, `SynthesisContext`, `RailsWorkspace`, `Weather`, `WalletError`, `WindowRect`, `SortOrderType`, `EncryptedWalletHandler`, `OrderDoc`, `ExportFormat`, `TableOfContents`, `WebGLSync`, `IStopwatch`, `PerpMarketInfo`, `IDrawOption`, `dGlobals`, `DisplayStyleProps`, `CollectionTemplateable`, `HostLabelInput`, `ALong`, `ReleaseGoldConfig`, `VisTypeOptions`, `PutBucketTaggingCommandInput`, `DiagnosticCollection`, `PickerController`, `NotificationProperty`, `IBounds`, `IPaintEvent`, `IPolicy`, `RouteMeta`, `PropFunctionSignature`, `DataSource`, `PlanStep`, `ResolverFn`, `PlatformConfig`, `SqlTuningAdvisorTaskSummaryReportIndexFindingSummary`, `OpenFileFilter`, `AccountNetwork`, `RootOperationNode`, `BinaryEncoding`, `WorkspaceSetting`, `IHistorySettings`, `FaIconLibrary`, `LightGroup`, `StructCtor`, `FeaturesList`, `WavyLine`, `MdcCheckbox`, `SerializableResponse`, `AppContextType`, `TokenType`, `ConvexClipPlaneSet`, `unchanged.WithHandler`, `ObjectProvider`, `IAnswers`, `DownloadOptions`, `ChannelUpdateMessage`, `XI18nProperty`, `PortModel`, `ApplicationType`, `Datasources`, `InputPort`, `ExtendedSettingsDescriptionValueJson`, `IProtoTask`, `OrganizationalUnit`, `HandleResult`, `sdk.ConversationTranscriber`, `SchemaAttributeType`, `JSDocTupleType`, `UpSetJSSkeletonProps`, `Stringer`, `ApiPipelineVersion`, `Suite`, `android.app.Activity`, `SavedObjectsUpdateObjectsSpacesOptions`, `Expected`, `UpdateFileService`, `IGetToken`, `ViewerModel`, `XSession`, `DeleteProjectCommandInput`, `TypescriptAst`, `ChildrenType`, `PropertyData`, `IRedisOptions`, `ReaderIO`, `MessageAttributeValue`, `UISize`, `BlurState`, `ClientBase`, `AwsEsdkKMSInterface`, `JSDocTypeLiteral`, `EntitiesState`, `IOrganizationCreateInput`, `TestSuiteResult`, `Parse`, `serviceRequests.GetWorkRequestRequest`, `EscapedPath`, `ParseTreePatternMatcher`, `ShowProps`, `Searchable`, `OrganizationProp`, `DiffError`, `DelugeTorrent`, `Enum`, `CreateDomainResponse`, `DraggedItem`, `ICollections`, `BlogState`, `TopicsData`, `InternalCredentialManager`, `FilterMeta`, `ExploreBundleResult`, `ASN`, `ROLES`, `ActionFilter`, `CursorProps`, `BigFloat53`, `Reducer`, `ApiTableData`, `SocialTokens`, `AudioInterface`, `iAction`, `FileSystemCallback`, `InputSearchExpression`, `mongoVisitor`, `Themer`, `StoreCreator`, `ArgResult`, `VpnConnection`, `LogParse`, `MdcChipSet`, `kChar`, `Checkpoint`, `SplitAreaDirective`, `UserMetadata`, `UserProfileService`, `dStage_stageDt_c`, `ContainerBinding`, `mpapp.IPageProps`, `PhysicalModel`, `ClothingProps`, `Scheme`, `Model1`, `deployData`, `NodeJS.Process`, `AggregateRewriteData`, `ScriptLoaderService`, `Response`, `LSA`, `btRigidBody`, `MinHeap`, `Suggestions`, `MFARequest`, `HttpResponseException`, `CronExpression`, `CancelTokenStatic`, `KeyringPair`, `EditorGroup`, `TreeConfiguration`, `DatamodelEnum`, `BoxProps`, `BlockHash`, `RxFormControl`, `PaginationParams`, `MapperOptions`, `GroupDocument`, `ScrollEventData`, `TDest`, `AllTokens`, `SafeExpr`, `AvailabilityTestConfig`, `FakePromise`, `browser`, `DrgRouteDistributionMatchCriteria`, `net.Endpoint`, `CharacteristicSetCallback`, `LogAnalyticsParameter`, `OrthogonalArgs`, `InstalledClock`, `ExploreStateModel`, `SearchClient`, `ExtrudedPolygonTechnique`, `LocalStorageIndex`, `NormalizedEnvironment`, `RegularPacket`, `EdaPanel`, `PropertyResolveResult`, `VectorPosition`, `CirclineArc`, `YAMLDocument`, `AnalyzableProperty`, `VerificationInput`, `MessageTag`, `ActionSequence`, `pouchdb.api.methods.NewDoc`, `ChangeState`, `IAppState`, `ScopeSymbolInfo`, `MockOracleInstance`, `TabName`, `HdPublicNode`, `GroupOrOption`, `IMaterialAttributeOptions`, `AreaFormType`, `apid.ThumbnailId`, `PipelineStageUnitAction`, `CompositeContentBuilder`, `ActionLogger`, `EditorEvent`, `ControlPoint`, `PrepareOptions`, `CamlQuery`, `ISegSpan`, `ISafeFont`, `GeneralState`, `RequestForm`, `HTMLTableDataCellElement`, `BuilderReturnValue`, `ParsingContext`, `L.Property`, `TxCreate2Transfer`, `GetInsightsCommandInput`, `RuleDefinition`, `IFileWithMeta`, `IAlert`, `Alt`, `DSONameService`, `DiscordooError`, `IReserveApiModel`, `ElementRef`, `AccountStellarPaymentsConfig`, `types.CodeError`, `Migrate`, `SimplifiedType`, `ChartDimensions`, `PlayerStateService`, `AutorestDiagnostic`, `SchemaHelper`, `RPCMethodDescriptor`, `RequestEnvelope`, `GraphQLTaggedNode`, `VersionEdit`, `OasOperation`, `CSS`, `QueryValue`, `BuildParams`, `PeerSetup`, `ResultList`, `ServerRequestModel`, `MenuCardProps`, `MinMaxNormArgs`, `ValueFillDefinition`, `tf.io.WeightsManifestConfig`, `PackageInfo`, `DAL.KEY_DOT`, `InternalServiceErrorException`, `SentInfo`, `CalendarInput`, `Foam`, `FilesState`, `StructurePreviewProps`, `FunctionMap`, `NavigationOptions`, `SeriesDataType`, `GcpCloudProvider`, `TransferRequest`, `capnp.List`, `Decoder`, `SyntheticPerformanceMetrics`, `ApiParams`, `Measurements`, `TaskManager`, `NestedHooks`, `TestSandbox`, `AttributionsWithResources`, `QuantityFormatter`, `NSMutableURLRequest`, `SubmodelImage`, `FacetFaceData`, `SpreadSheetFacetCfg`, `IAuthStrategy`, `Vp9RtpPayload`, `cheerio.Cheerio`, `IBackendRequestData`, `TableHeader`, `EntityRepository`, `ScannedImport`, `CustomEditor`, `DOMQuery`, `RumEvent`, `ExecutionActivity`, `IArguments`, `ProjectControlFunction`, `MigrationTypes`, `ProtocolNotificationType`, `CheckBox`, `ValueTypes`, `SlideProps`, `UpdateBuilder`, `SphereColliderShape`, `BoardBase`, `History`, `IConfirmedTransaction`, `MatChipInputEvent`, `HitDetail`, `SavedVisState`, `IdentifierValue`, `BiquadFilterNode`, `EnhancedEmbeddableContext`, `ListSchema`, `ClickSource`, `AuthInfo`, `StringDictionary`, `DataPlanSObject`, `tf.Tensor`, `QueryAccess`, `IfStmt`, `ViewPortComponent`, `Aggregation`, `OrSchema`, `AlexaLambda`, `TrueSkill`, `TagResourceResult`, `Session`, `ClientReadableStream`, `PushDownOperation`, `AbstractFetcher`, `IClusterDefinition`, `ParticleEmitter2`, `SecurityQuestionStore`, `StepAdjustment`, `ConditionFilterType`, `WindowsJavaContainerSettings`, `WebPartContext`, `ColorKey`, `android.bluetooth.BluetoothGatt`, `PiTriggerType`, `FormState`, `ExtractGetters`, `Generatable`, `EqualsGreaterThanToken`, `ListenerOptions`, `SymlinkCache`, `Vout`, `CrossBridgeAsset`, `InvalidFieldError`, `NonNullTypeNode`, `PouchDB.Database`, `EyeglassOptions`, `EventInterface`, `TableRecord`, `CreateWalletFlow`, `StaticBuildOptions`, `CommonTokenStream`, `MeterCCGet`, `AppHandler`, `DOMParser`, `PianoService`, `FieldApi`, `MetricState`, `ColumnWidths`, `FieldsetContextProps`, `CliParam`, `ListViewEventData`, `LoanFactory2`, `IncomingStateType`, `ParsedTag`, `GeneratorManifest`, `TouchEventHandler`, `AnnotationTypeOptions`, `UpdateWriteOpResult`, `flatbuffers.ByteBuffer`, `AstExprState`, `TimelineItem`, `MapControlsUI`, `ChangeHandler`, `SyncEvent`, `GLsizeiptr`, `DotLayerArgs`, `HttpsFunction`, `ResolvedStyle`, `FuncKeywordDefinition`, `HardhatConfig`, `RenderElementProps`, `QueryFetcher`, `DataCallback`, `CategoryCollectionParserType`, `StatusIndicatorProps`, `RepositoryCommonSettingType`, `BlockHandle`, `DedicatedHost`, `IProjectCard`, `GenericRequestHandlerChain`, `NewBalanceFn`, `CAPIContent`, `StoryObj`, `DiezComponent`, `IWorkflowExecutionDataProcess`, `SourceBuffer`, `Boundary`, `MatTabChangeEvent`, `CheckReferenceOriginsParams`, `Selectors`, `TemplateScope`, `KeyResultService`, `GetState`, `AsyncSink`, `FixtureLoader`, `Pane`, `TouchEvent`, `BackwardIterator`, `IExpression`, `MyAudioContext`, `DriverSurface`, `CreateResolversArgs`, `_Identifiers`, `Monad3`, `CurrencyToValue`, `Bug`, `BaselineEvaluation`, `BlockService`, `ResolveContext`, `EntityWithEquipment`, `Ceramic`, `SiemResponseFactory`, `ConfiguredPluginResource`, `Planet`, `BroadcastChannel`, `StatusIndicatorGenericStyle`, `References`, `BuildListInstanceCreateOptions`, `FilterMap`, `ApplicationShell`, `UpToDateStatus`, `DispatchFunction`, `DataHandle`, `FilteredLayer`, `HTMLVideoElement`, `TemplateExpression`, `ITagInputProps`, `Flashcard`, `handleEvent`, `FkQuadTree`, `search.SearchState`, `YamlNode`, `UnpackNode`, `ITranslation`, `RowParser`, `TSDocConfiguration`, `MonoRepo`, `Mirror`, `UnsignedContractDeployOptions`, `IOneArgFunction`, `TaskResult`, `AboutComponent`, `NameExpression`, `SafeTransaction`, `V2BondDetails`, `ColorPickerService`, `PointGeometry`, `ReconnectionOptions`, `BVHNode`, `HapiRequest`, `AbiParameter`, `PartialMessageState`, `AbsolutePath`, `ReplayableAudioNode`, `CallLikeExpression`, `AppxEngineActionContext`, `QueueOptions`, `_m0.Writer`, `WorkspaceFolderContext`, `BaseMsg`, `ConfigDeprecation`, `NetworkModel`, `SQLError`, `Konva.Shape`, `None`, `SuggestionsService`, `IFibraNgRedux`, `IosBinding`, `IDataState`, `ITestScript`, `OpGraphPath`, `FilterRule`, `SelectColony`, `DeserializeWireBaseOptions`, `RumPerformanceEntry`, `Request`, `DaffCompositeProductItemOption`, `SinonFakeServer`, `MergeTree.TextSegment`, `IExecSyncResult`, `MappedTypeGuard`, `ServiceAnomalyTimeseries`, `DomainDropSet`, `TypeResolution`, `KeyResultUpdate`, `HandledEvent`, `DialogData`, `ActionReturn`, `FunctionalUtilities`, `GeoObject`, `VerticalAlignment`, `GenesisProtocolProposal`, `mjComponent`, `ClarityAbiType`, `CreateConnectionDTO`, `GeneralEventListener`, `DescribeAddonCommandInput`, `MemberDoc`, `SignaturePad`, `ModalOptions`, `CompilerOptionsValue`, `IMenuItemInfo`, `TestScheduler`, `ElementFinder`, `MessageAttachment`, `ShaderMaterial`, `DeleteQueryBuilder`, `MessageButton`, `NodeModule`, `SlugifyConfig`, `AuthVuexState`, `GraphQLHOCOptions`, `ClientEngineType`, `IndividualTestInfo`, `MetadataMap`, `IRequestQueryParams`, `DynamoDB.PutItemInput`, `BitcoinAPI`, `Mass`, `QueryLang`, `DataCardEffectPersonType`, `PageG2Type`, `Encryption`, `IUserOptions`, `ChainInfoWithEmbed`, `DeleteWorkRequestResponse`, `ActionsConfig`, `GridLineStyle`, `Resort`, `ClientHello`, `HALLink`, `ListModelsCommandInput`, `ExtensionContext`, `ReflectiveInjector`, `SelectableTreeNode`, `AbstractUserProxy`, `VerifiedCallback`, `ValidatedPassword`, `DatabaseStatus`, `Associativity`, `Geoposition`, `InjectedMetadataSetup`, `LinkedService`, `ChartParameters`, `InvocationArguments`, `PiElement`, `Uint16Array`, `ODataModelEntry`, `PrintLabel`, `RoomBridgeStoreEntry`, `OsmWay`, `IThunkAction`, `LinkResolverResponse`, `OdmsPhaseActions`, `UpdateEntry`, `TSESTree.Expression`, `IServiceContainer`, `Slate`, `AuthMachineContext`, `moq.IMock`, `BsModalRef`, `AutoFix`, `PrimitiveValue`, `TemplateLiteralType`, `InruptError`, `RepoBuilder`, `TrackList`, `TOCHeader`, `Order`, `ServiceConfiguration`, `WorkDoneProgressReporter`, `Dependency`, `EmployeeAppointmentService`, `KubeConfig`, `ContactModel`, `IPC.IFile`, `DomainInfoCache`, `coreClient.OperationSpec`, `PaginationState`, `VoiceProfile`, `ReactRef`, `ConcreteBatch`, `InspectTreeResult`, `ApplicationContainerState`, `JobExecution`, `Argv`, `LexoRank`, `UseGeneric`, `ScreenshotOptions`, `MapMeshStandardMaterial`, `FontSize`, `FlashSession`, `IHookStateSetAction`, `ISPField`, `WebGL`, `RtpPacket`, `GenerateOptions`, `Conic`, `ITableData`, `SystemErrorRetryPolicy`, `ChangeFlag`, `CategorizedPropertyMemberDoc`, `PlaylistWithLoadingState`, `ISegment`, `AzureDataTablesTestEntity`, `ComponentProp`, `Angulartics2IBMDigitalAnalytics`, `IPersonalizationSurveyAnswers`, `GitData`, `PlotArea`, `ICombiningOp`, `AngularFireList`, `ColumnBands`, `PromiseDelegate`, `UsersDetailPage`, `RnM2`, `RuntimeMappings`, `Breakpoint`, `Mp4BoxTree`, `TaskLibAnswers`, `SfdxCommandlet`, `ViewValue`, `RowProps`, `StringTypeMapping`, `ConnectedComponentClass`, `RTCSessionDescription`, `BufferReader`, `GLbitfield`, `PostQueryVarsType`, `BoxListEntity`, `DescribeReportDefinitionsCommandInput`, `PageActions`, `ConversionFunction`, `InboundMessage`, `TaskInstance`, `INixieControlPanel`, `K3`, `DebugProtocol.StepInArguments`, `TimeSpan`, `InsertContentDOM`, `NamedIdentityType`, `ImageProps`, `FirebaseMock`, `FnReturnType`, `NodeVisitor`, `HTMLVisualMediaElement`, `GQLType`, `CategoryAxis`, `ExpressClass`, `TeamsActions`, `SinonStub`, `SortingService`, `KeyboardShallowWrapper`, `STHConfiguration`, `ToggleGroupProps`, `SubstrateNetworkParams`, `GroupWithChildren`, `IOriginConfiguration`, `OnChangeValue`, `IRegionConfig`, `VirtualKey`, `IDataSourcePlugin`, `IAddGroupUsersOptions`, `DAL.KEY_COMMA`, `CognitoUser`, `WebGLTransformFeedback`, `RecursiveAnnotation`, `TDiscord.Message`, `DeleteDedicatedIpPoolCommandInput`, `OAuthCredential`, `ICalDateTimeValue`, `RowInfo`, `PlatformNode`, `FilterProps`, `ReferenceResolverState`, `ProblemType`, `LuaDebugVarInfo`, `NearestIntersection`, `DataProvider`, `ListResult`, `ParticipantsLeftListener`, `ProductV2`, `MockAddressBookInstance`, `ScopedDeployment`, `FluidObjectSymbolProvider`, `HttpRequestWithGreedyLabelInPathCommandInput`, `SitemapXmpOpts`, `RTMClient`, `TextureSourceOptions`, `ShippingEntity`, `QueryResultRow`, `EdgeMemento`, `Answers`, `Word`, `DocusaurusContext`, `ObservableHash`, `MLKitRecognizeTextResult`, `IOptions`, `BasicCCGet`, `TransactionOrKnex`, `PopperOptions`, `StyleBuildInfo`, `RequestResponder`, `NodeJS.Timer`, `VaultID`, `ReXer`, `SignatureHelpResults`, `TCallback`, `ProjectQuery`, `CompilerFileWatcherCallback`, `PointComposition`, `ITimelineItem`, `SocketAwareEvent`, `DeviceConfig`, `TimerType`, `XYZNumberValues`, `OrganizationMemberType`, `CanAssignFlags`, `Ability`, `SegSpan`, `GroupParameters`, `SearchBarProps`, `DBQuery`, `SchemaInput`, `PaymentOptions`, `LoopOutParameter`, `OutputsType`, `ObstaclePortEntrance`, `CreatedInstance`, `RemoteStoreRoom`, `ApiNotificationSender`, `InputSize`, `org`, `RootToken`, `UIColor`, `ListAutoScalingPoliciesRequest`, `Chai.ChaiStatic`, `BranchInfo`, `ast.LetNode`, `GameWorld`, `SxParserState`, `RepositoryCommonSettingEditWriteModel`, `SdkFunctionWrapper`, `JSONDocument`, `AsyncQueryable`, `sdk.TranslationRecognizer`, `_1.EventTargetLike.HasEventTargetAddRemove.Options`, `WatchableFunctionLogic`, `InstantiationContext`, `IKeyResultUpdate`, `ILibraryRootState`, `PossiblePromise`, `IAnyModelType`, `Shading`, `ChartPointSourceItem`, `CodeLocation`, `InstalledDetails`, `ComponentTag`, `TaskManagerConfig`, `SVFloat`, `HttpTerminator`, `IZoweNodeType`, `ListObjectsRequest`, `ObjectView`, `GeolocationService`, `IExtensionMessage`, `NOTIFICATIONS_STATUS`, `VisualUpdateOptions`, `LabelDoc`, `StorageEvent`, `PageG2`, `PaymentData`, `CredValues`, `IKeyQuery`, `SecureRandom`, `SyncStatus`, `SingleSampledTexture`, `ExpectationRepository`, `InitParams`, `ts.ModuleResolutionHost`, `EncodedManagedModel`, `RuleFailure`, `HttpResponseEncoding`, `ITranslationService`, `ITdDataTableSortChangeEvent`, `DragEventHandler`, `Parameter`, `IAutocompletionState`, `Typehole`, `Period`, `ApolloMutationElement`, `SimpleWallet`, `InternalSession`, `RegistryDataStream`, `StepperContext`, `LooseObject`, `Events.initialize`, `CacheStore`, `FileSystemHelper`, `AnyWire`, `IExtensionActivationResult`, `CombinationConstraint`, `EmusakEmulatorConfig`, `RPCMessage`, `TBook`, `FlattenLayerArgs`, `BoxSliderOptions`, `KarnaughMapProps`, `IndexedTrendResult`, `AppImages`, `TapeNode`, `AnimationReferenceMetadata`, `AndroidAction`, `ListFormat`, `DescribeNamespaceCommandInput`, `ComponentCompilerStaticEvent`, `IMatrixCell`, `PresentationManager`, `TextDocumentIdentifier`, `CombatantViewModel`, `UIAnalytics`, `PieceSet`, `GridsterItem`, `TimeBin`, `QuerySolution`, `FACE`, `vscode.TextDocumentContentChangeEvent`, `ImportKeyPairCommandInput`, `ApplicationConfig`, `RiskLevel`, `CallHierarchyDefinition`, `UrlTree`, `MXMirrorObjMethodCall`, `FunctionWithKey`, `InterfaceWithConstructSignature`, `LayoutConfig`, `PoolType`, `GadgetPropertyService`, `CreateManyDto`, `CollectionObj`, `AuthStateType`, `SVGO`, `TestHotObservable`, `RenderProps`, `SdkDataMessageFrame`, `TextRangeDiagnosticSink`, `WithString`, `FrameOffset`, `LocalSession`, `ParsedPacket`, `WatcherHelper`, `FSService`, `ByteWriter`, `DefaultAnchors`, `Records`, `IPlDocVariablesDef`, `SessionService`, `FramerAPI`, `JsxText`, `Discord.Channel`, `HistoryType`, `AppxEngineStepGroup`, `GetByIdAccountsValidationResult`, `XTransferNode`, `CalendarData`, `NamingStrategy`, `GXMaterial`, `Wah`, `GrantType`, `GenericEvent`, `SVGForeignObjectElement`, `ListSourceApplicationsRequest`, `FormControlConfig`, `ValueToken`, `KeyboardLayout`, `GridIndicator`, `PersistencyPageRange`, `OrganizationModel`, `props`, `SerializedChangeSet`, `Locations`, `Width`, `DynamoDB.BatchWriteItemInput`, `SubMiddlewareApi`, `Ident`, `CombinedPredicate`, `ClassifyService`, `SuggestionOperationType`, `RequestMethod`, `HashTable`, `Todo_todo`, `ArrayBufferSlice`, `ExecutableItemWrapper`, `GetRowIdFn`, `SettingsPropsShared`, `FormSection`, `EtcdOptions`, `IFieldPath`, `IntVoteInterfaceWrapper`, `WeConsoleScope`, `TransactionBase`, `models.ArtifactItem`, `BindingOrAssignmentPattern`, `Designer`, `RichRemoteProvider`, `Double`, `GenerateFn`, `FinalDomElement`, `ConversationState`, `ExplicitFoldingConfig`, `IMOSStoryAction`, `IIssuerConfig`, `MenuConfig`, `APIConfig`, `UnitNormArgs`, `GetReviewersStatisticsCollectionPayload`, `InsertQueryBuilder`, `WritableFilesystem`, `IPartitions`, `IntegerList`, `StartPlugin`, `RunnerGroup`, `ConditionResolution`, `TransmartCountItem`, `ProcessStorageService`, `Factory`, `PubSubListener`, `AdInfo`, `ObjExplorerObjDescriptor`, `RuntimeField`, `GroupProps`, `ModelMesh`, `MiscellaneousField`, `QueryBus`, `LoggerFactory`, `IdentityData`, `Line`, `ViewportRuler`, `DiagnosticSink`, `VisualizeAppStateContainer`, `BotTagMasks`, `puppeteer.ScreenshotOptions`, `IScheduler`, `IPayload`, `ComponentOpts`, `MessageRepository`, `PanelMode`, `types.UiState`, `LiteralShape`, `Vector2_`, `Awaiter`, `SchemaUnion`, `BigintIsh`, `TreeSelectionReplacementEventArgs`, `ModalDialogOptions`, `CustomOkResponse`, `GraphQLList`, `AjaxConfig`, `vscode.WorkspaceFolder`, `CausalRepoBranchSettings`, `WaitForSelectorOptions`, `RecursiveArray`, `XMessageService`, `SEGroup`, `RtcpHeader`, `PlayerController`, `IdDTO`, `FormikProps`, `InputStream`, `ISearchRequestParams`, `GetEmailIdentityCommandInput`, `CloneOptions`, `D3LinkNode`, `ArrayAccessValue`, `SettingRepository`, `ChannelContext`, `SnapshotProcessor`, `ElementType`, `InsightModel`, `PumpState`, `OpenApi.Document`, `React.Props`, `BottomBarItem`, `FormatOptions`, `RevisionValueCache`, `OriginAccessIdentity`, `CipherWithIds`, `StripePaymentMethod`, `TwistAction`, `WechatyVorpalConfig`, `IGLTFLoaderExtension`, `TableCellPosition`, `Listing_2`, `UIWindow`, `indexedStore.Store`, `TypeDictionaryInfo`, `TestEthersProvider`, `RectangleObject`, `ENDStatement`, `MessageRemoteImage`, `a.Expr`, `P2WPKHTransactionBuilder`, `EntityValidator`, `Tag`, `SavedObjectsBaseOptions`, `ICategoryBins`, `InputDataConfig`, `Stripe`, `Linter`, `DocService`, `ProgressUpdate`, `StatsModuleReason`, `LogoActionTypes`, `requests.ListConnectHarnessesRequest`, `UTXO`, `OptionalMaybe`, `NumberSet`, `GetProductSearchParams`, `Stripe.Event`, `SettingsNotify`, `SetSettingEntry`, `BrowserIndexedDBManager`, `LegacyReputationToken`, `CopyLongTermRetentionBackupParameters`, `IModalContent`, `GeoContext`, `ClassWithMethod`, `IValidationOptions`, `MediaModel`, `Other`, `SurveyQuestionEditorTabDefinition`, `TypeOrmHealthIndicator`, `MathfieldPrivate`, `DeleteDatasetCommandOutput`, `NzTreeNodeOptions`, `IG6GraphEvent`, `TinyDate`, `TrackModel`, `GetAccountSettingsCommandInput`, `IBook`, `ShimFactory`, `ResponderRecipeResponderRule`, `Sphere`, `MouseButtonMacroAction`, `ThemeTypes`, `Changes`, `Keypoint`, `KeyExchange`, `MarkerInfo`, `DecorateContext`, `ExpansionPanel`, `CSharpMethod`, `LoaderResource`, `BINModelSectorData`, `ERC721TokenDetailed`, `ServiceAccount`, `DidOpenTextDocumentParams`, `FlexLine`, `Interfaces.ViewEventArguments`, `NumericArray`, `RawLogEvent`, `GeoPolygonFilter`, `PostFrameUpdateType`, `NotesRange`, `XRReferenceSpace`, `IMessageFromBackground`, `AccountsInstance`, `SelectedItem`, `FramePublicAPI`, `TransportRequestOptionsWithOutMeta`, `ImmutableObjectiveGroup`, `IDataFilterConfiguration`, `TypeCache`, `Content`, `FieldFormat`, `protocol.FileLocationRequestArgs`, `MessageResolvable`, `ConcreteTaskInstance`, `ICredentialsDb`, `FogBlock`, `RadioProps`, `Q.Promise`, `ChildExecutor`, `TConstructor`, `JSDocFunctionType`, `RouteInitialization`, `CSSMotionProps`, `PluginRevertActionPayload`, `CreateScope`, `LineShape`, `ForwardingParams`, `HTMLDice`, `ZosJobsProvider`, `ProsemirrorAttributes`, `t_a25a17de`, `CloudWatchDimensionConfiguration`, `ActionContext`, `apid.GetReserveOption`, `DeleteGatewayCommandInput`, `Post`, `_DeepReadonlyObject`, `Test2`, `$mol_atom2`, `vscode.WorkspaceConfiguration`, `GetStateReturn`, `ListCertificatesRequest`, `InviteMemberCommand`, `HTMLOptGroupElement`, `Button`, `types.IDynamicOptions`, `ValidateFilterKueryNode`, `FbForm`, `Icons`, `ValueIterator`, `NzTreeNode`, `DescribeHomeRegionControlsCommandInput`, `VertexElement`, `TestAssertionStatus`, `SubscribedObject`, `InferenceContext`, `Jest26Config`, `vd.createProperties`, `IVector4`, `TemplateElement`, `PainterElement`, `APropInterface`, `IpcMessage`, `Decl`, `BuildVisConfigFunction`, `FindConditions`, `RunningState`, `SelectorMeta`, `CookieService`, `PotentialApiResult`, `InitData`, `Org`, `Reddit`, `TokenSet`, `Manifest`, `E.ErrorMessage`, `AddApplicationOutputCommandInput`, `StripeSetupIntent`, `IcalEventsConfig`, `SwaggerPathParameter`, `CalculatedTreeNode`, `IconifyBrowserCacheType`, `GPUBindGroup`, `BundleDataService`, `TokenDetailsService`, `IntrospectionInputObjectType`, `ABIReturn`, `ExperimentSnapshotDocument`, `StatusIcon`, `DiagramModel`, `RenderAtomic`, `HierarchyQuery`, `RpcRequest`, `MyButton`, `LoggerFunction`, `AndroidManifest`, `MemoryUsage`, `L2Data`, `ViewBase`, `FieldValues`, `LoginUser`, `DefaultChangeAnalyzer`, `ChangePasswordState`, `PoiBuffer`, `IInstance`, `CounterService`, `StringTypes`, `GraphStats`, `AccountBalance`, `SharedDirectory`, `ComponentPath`, `SessionRefreshRequest_VarsEntry`, `vBlock`, `RegisteredTopNavMenuData`, `Scenario_t`, `WithExtendsMethod`, `PacketRegistry`, `HistoryNodeEvent`, `TocState`, `EventInfo`, `USSTree`, `UsedSelectors`, `VirgilPublicKey`, `PrivateCollectionsRoutes`, `Swagger.Schema`, `PhysicalElementProps`, `MutationOptions`, `TRPCErrorResponse`, `Modifiable`, `PivotGroupByConfig`, `Q`, `MiddlewareFactory`, `roleMenuInterface`, `MainPageStateModel`, `ChildProcess`, `RootConnection`, `IFluidHandleContext`, `IApiParameter`, `CreateTableCommandInput`, `EventNote`, `NetworkOptions`, `DropdownOption`, `ReduxRootState`, `ReserveInstance`, `KeyframesMap`, `IOHandler`, `ViewConverter`, `ReputationToken`, `Event24Core`, `IAppointment`, `FormComponent`, `TRPCLink`, `RawSeries`, `VertexAnimationEffect`, `tf.GradSaveFunc`, `TableMap`, `GalleryApplicationVersion`, `ForNode`, `DialogInput`, `Border`, `DBOp`, `Rigidbody3D`, `ResolvedModuleFull`, `Doc`, `ScaleByBreakpoints`, `PropIndex`, `addedNodeMutation`, `IHawkularAlertRouterManager`, `FriendshipPayload`, `google.maps.GeocoderResult`, `Payment`, `TrackGroup`, `GenderRepartitionType`, `ElectrumNetworkProvider`, `requests.ListUserGroupMembershipsRequest`, `Gender`, `Load`, `CSSProps`, `ResourcePrincipalAuthenticationDetailsProvider`, `PeerTypeValues`, `TupleTypeNode`, `IORouterRegistry`, `AuthModeChanged`, `IModalProps`, `HTMLLinkElement`, `DescribeApplicationCommandInput`, `ToggleComponent`, `WidgetView`, `Keypair`, `CellClassParams`, `MotionChartData`, `MediaQuery`, `StatefulChatClientWithEventTrigger`, `QueryRunner`, `LContainer`, `ISummaryContext`, `XBus`, `FlagshipTypes.AndroidConfig`, `ComponentTestingConstructor`, `IRules`, `IProjectData`, `TEUopType`, `ChatThreadProperties`, `ViewManager`, `ReportParameter`, `ColumnMetadata`, `express.NextFunction`, `CountryService`, `AbstractCancellationTokenSource`, `ObjectWithId`, `ProgramCounter`, `MSITokenResponse`, `Angulartics2`, `ServiceCollection`, `ListImagesResponse`, `AggConfigSerialized`, `UserAccountID`, `UnionRegion`, `ITodoState`, `CreateMessageDto`, `RenderMethod`, `DeployUtil.ExecutableDeployItem`, `HitCircleVerdict`, `ScrollView`, `ObservableArray`, `ShareService`, `JwtService`, `GetResult`, `Jsonp`, `ILogService`, `GeometryCommand`, `LogContext`, `DAL.KEY_X`, `GuildResolvable`, `HubConfigInterface`, `ActionsSubject`, `BrowseEntrySearchOptions`, `ContainerFormData`, `OutcomeShortHand`, `EditorContext`, `SymbolInformation`, `LatLngExpression`, `AuthenticationFlowRepresentation`, `MediaStreamsImpl`, `ThyFormValidatorGlobalConfig`, `DecoratedModelElement`, `JsonParserContext`, `SpriteState`, `BlockFactory`, `MemberDescriptor`, `CDPSession`, `OpenApiSchema`, `AvatarConfig`, `IEnemy`, `requests.ListDbHomePatchesRequest`, `ShoppingCartContextValue`, `CountBadgeProps`, `SdkSignalFrame`, `PyteaWorkspaceInstance`, `Assertion`, `ClientFile`, `Buildkite`, `FragmentDefinition`, `tags.Table`, `DocumentMetadata`, `TAuthor`, `Models.Side`, `VideoGalleryRemoteParticipant`, `FieldTypeByEdmType`, `EngineTypes`, `TriggerData`, `ReferenceCallback`, `CatExpr`, `GetAppInstanceRetentionSettingsCommandInput`, `BaseExecutor`, `Unsub`, `t_44e31bac`, `DefaultMap`, `DocumentData`, `ParseString`, `GroupHoldr`, `HsEndpoint`, `PreprocessingData`, `FetchableType`, `InnerJoin`, `BaseVisTypeOptions`, `WesterosGameState`, `MagnetarInstance`, `DictionaryPlugin`, `PluginWriteAction`, `FlowPreFinallyGate`, `Yendor.Tick`, `NavigationStart`, `ListAccountsCommandInput`, `TError`, `CollectionView`, `AlterTableBuilder`, `RenderedItem`, `StatusPublisher`, `ExplorationInfo`, `AttributeInput`, `TokenMap`, `PropsType`, `ViewAction`, `ParseResults`, `InvalidParameterValueException`, `LogAnalyticsSourceFunction`, `VerificationInitiateContext`, `DiagnosticBuffer`, `IAuthState`, `HdEthereumPaymentsConfig`, `BSplineWrapMode`, `OrganizationSet`, `code.Position`, `CreateChannelRequest`, `ObserverNameHolder`, `ParseTreeListener`, `GameInput`, `ExtraInfoTemplateInput`, `AppStackOs`, `CandidateInterviewService`, `ITenantSetting`, `Phaser.Game`, `LogisticsRequest`, `DeleteEnvironmentCommandInput`, `Actual`, `SinonMatcher`, `LayerId`, `OmitFuncProps`, `NavigationPublicPluginStart`, `ContractDeployOptions`, `Mustering`, `ScaleQuantize`, `IReactComponentProps`, `ng.ICompileService`, `ErrorType`, `DiagnosticsLogger`, `P2P`, `ExceptionConverter`, `OasRef`, `RobotApiRequestMeta`, `TRANSFORM_STEP`, `DecoderFunction`, `StorageEntry`, `LROperation`, `InputResolution`, `UNK`, `ApiViewerTab`, `GX.Command`, `WeaponObj`, `MouseMoveEvent`, `ChatState`, `UpdateContactCommandInput`, `Y.Doc`, `AppRoot`, `DestinationHttpRequestConfig`, `Codeword`, `Git.VersionControlRecursionType`, `InjectedProps`, `UIAction`, `AlbumListItemType`, `HighlightRange`, `DescribePipelineCommandInput`, `ReacordTester`, `EC.KeyPair`, `FindRelationOptions`, `ModelFactory`, `RepositoryType`, `ClassDescriptor`, `DeflateWorker`, `EntityColumnDef`, `XPCOM.nsIComponentRegistrar`, `ApiItemMetadata`, `SyncService`, `IMapItem`, `VimMode`, `MacroMap`, `KanbanBoardState`, `IControllerAttributeExtended`, `TransportType`, `CSSDesignToken`, `CategorizationAnalyzer`, `KeyCode`, `RestServerConfig`, `ErrorRes`, `ScrollerAnimator`, `AsyncPipeline`, `StreamClient`, `PiPropertyInstance`, `LoginToken`, `HexMesh`, `_THREE.Vector3`, `DocumentSettings`, `SolflareWallet`, `RelativePosition`, `Magma`, `TestDtoFilter`, `TestSchemaProcessor`, `ScanSegment`, `TRawComponent`, `IndicesArray`, `TSocketPacket`, `KeyPairKeyObjectResult`, `TTK1AnimationEntry`, `loader.LoaderContext`, `Happening`, `SymBool`, `IntentSchema`, `OCSpan`, `GDQOmnibarListElement`, `ParsedLineType`, `ProjectedDataItem`, `CursorModelConfig`, `Sizes`, `PlainData`, `NormalisedSearchParams`, `TheBigFanStack`, `IDynamicGrammarGroup`, `FilesystemDirectoryNode`, `EventSubscriber`, `DecoratorObject`, `ActionTypeModel`, `GoStoneGroup`, `TSelected`, `TestLogger`, `IResultSetUpdate`, `BucketAggType`, `Piscina`, `AwsClientProps`, `JieQi`, `webpack.compilation.Compilation`, `FunnelStep`, `JIntersection`, `SBDraft2CommandInputParameterModel`, `EventActionHandler`, `SpanContext`, `IndependentDraggable`, `Models.GameState`, `IGuildMemberState`, `builders`, `AgentConfigOptions`, `IRuleOption`, `ActivityInterface`, `DescribeTaskCommandInput`, `ChangLogResult`, `PointCompositionOptions`, `api`, `ImgAsset`, `RequiredFieldError`, `ICustomerRepository`, `ButtonListenerCallback`, `QueueStorageContext`, `V1ConfigMap`, `EvaluateOperator`, `TranslationKeys`, `YAMLMap`, `K3dClusterNodeInfo`, `FunctionResult`, `HttpContextConstructorContract`, `PostsState`, `SEdge`, `ActionSource`, `iReduxState`, `ParsedResponseHeaders`, `BucketAggParam`, `DecodedLogEntryEvent`, `MetaService`, `PageRect`, `Writable`, `DeepLinkConfig`, `DescribeUserResponse`, `requests.ListOdaInstancesRequest`, `ColumnSchema`, `T.Action`, `SubmitFeedbackCommandInput`, `TemplateInput`, `LabelSet`, `MathjsBigNumber`, `PlanningRestriction`, `td.AppLogger`, `NavigatorRoute`, `SymbolResolutionStackEntry`, `WaitingThreadInfo`, `BaseDbFieldParams`, `AdonisApplication`, `Vector3Like`, `JsonObjectProperty`, `DeveloperExamplesSetup`, `IHelper`, `OnOptions`, `DescribeEventsCommand`, `CallHierarchyDataItem`, `StoryFn`, `StepInfo`, `InputBit`, `IHttpPostMessageResponse`, `EventType`, `ToolbarWrapper`, `SvgTag`, `TagConfig`, `BaseArtifactProvider`, `VariantOptionQualifier`, `CreateCertificateResponse`, `EventStatus`, `SprottyDiagramIdentifier`, `ManagedID`, `PropertyCategory`, `SignalingOfferMessageDataChannel`, `InterceptorManagerUseParams`, `ClassDecorator`, `JupyterKernel`, `StepProps`, `ModuleOptionsWithValidateTrue`, `ChangesType`, `NestedDict`, `IFieldInfo`, `GanttItemInternal`, `TransactionOp`, `STColumn`, `ArgVal`, `IServiceInjector`, `IAppInfo`, `App`, `DateConstructor`, `DeleteAssociationCommandInput`, `RComment`, `ClientGrpcProxy`, `IReversibleJsonPatch`, `DiffOptionsNormalized`, `NumOrElement`, `GClient`, `ApisService`, `MessageEmitter`, `DayPickerProps`, `ConfigurationFile`, `CentersService`, `BabelPlainChain`, `GridStackItemWidget`, `ViewFilesLayout`, `A8k`, `WorkloadType`, `CivilContextValue`, `DaffCategoryFactory`, `WebAppRuntimeSettings`, `lsp.Position`, `$IntentionalAny`, `RelativeInjectorLocation`, `HashLockTransferAppState`, `ImmutableObjective`, `OauthRequest`, `requests.ListAutonomousExadataInfrastructuresRequest`, `IRunData`, `Bleeps`, `MaterialParams`, `ValueFilterPlugin`, `V1Job`, `SelectDropdownOption`, `ModelState`, `IOrganizationContact`, `WorldBoundingBox`, `Replica`, `CallAdapterState`, `Destination`, `React.FC`, `ReconciliationPath`, `DebugSourceBreakpoint`, `SelectedScriptStub`, `ICustomValidatorResponse`, `TransferArgs`, `WebContainer`, `IHand`, `TimeseriesDataRecord`, `HouseCard`, `FaasKitHandler`, `NotebookWorkspaceName`, `ElementDataset`, `UsePaginatedQueryOptions`, `IABIMethod`, `Vault`, `QuestionModel`, `SaveFileReader`, `SVGTextElement`, `ObjectSize`, `ViewStore`, `RawNavigateToItem`, `IUsedState`, `IBoxPlot`, `OtherActionsButtonProps`, `AnnotationLevel`, `Features`, `MethodHandler`, `TimelineBuckets`, `CollectionValue`, `AbstractServiceOptions`, `IFabricGatewayConnection`, `ReindexState`, `OrganizationProjectService`, `PageService`, `PluginKey`, `DOMWindow`, `PBRStandardMaterial`, `ConvexPolygon2d`, `Reporter`, `AccountLeague`, `CompressedJSON`, `IncomingHttpHeaders`, `BrowserControllerReturn`, `Knowledge`, `CoverLetterService`, `JsonContact`, `StringBuilder`, `ErrorHandlingService`, `IdOrNull`, `Electron.App`, `CanvasDepth`, `InjectedConnector`, `HttpError`, `WorkerMessage`, `CacheListener`, `AnalyzeCommentResponse`, `CategoricalAggregate`, `UserResult`, `BasePlayer`, `ProviderType`, `TypeParameterDeclaration`, `AwsCloudProvider`, `ElementContent`, `GlobalEventName`, `UpdateDomainNameCommandInput`, `CallReturnContext`, `ObservableFromObject`, `requests.ListAlarmsRequest`, `PluginComponents`, `DocumentService`, `Wrapped`, `DeleteConnectionCommandInput`, `FileSystemHost`, `TokenObject`, `SerializedEntityNameAsExpression`, `TimelinePoint`, `GeometryType`, `IClock`, `AxisDataItem`, `ReconnectingWebSocket`, `ProxyHandler`, `ParsedGeneratorOptions`, `UseMutationReturn`, `coreRestPipeline.RequestBodyType`, `ShadowRoot`, `IObjectInspectorProps`, `SymbolVisibilityResult`, `TEntity`, `WrappedStep`, `ExecuteStatementCommandInput`, `OperatorLogPoint`, `ErrorPaths`, `MousePosition`, `Application`, `TestServiceContext`, `SignalMutation`, `TestClient`, `UniqueNameResolver`, `IRequestHeaders`, `TextTip`, `ReadonlyObjectKeyMap`, `CreateProps`, `Events.stop`, `MagentoCartFactory`, `DataLabelOptions`, `PerspectiveCamera`, `TContext`, `TimerInfo`, `DiffuseMaterial`, `BasicTypeDefinition`, `DataFactoryClient`, `ProjectLocale`, `ThrottlingException`, `OAuthRequest`, `HammerInputExt`, `postcss.Container`, `ProviderData`, `CoursesCounter`, `GeneratedIdentifierFlags`, `RoomPayload`, `NodeBuilderContext`, `RootComponentRegistry`, `MappingPatternInfo`, `FieldTypes`, `HierarchicalNode`, `MyObserver`, `QueryPaymentsRequest`, `ShaderParams`, `OnPostAuthToolkit`, `NgOption`, `INgWidgetSize`, `QueryMwRet`, `GX.IndTexAlphaSel`, `core.DescribePath`, `PoisonPayload`, `interfaces.Factory`, `MergedCrudOptions`, `RenderTag`, `XYZValuesArray`, `ISetLike`, `ActionListener`, `IServiceParams`, `DocumentSymbolProvider`, `SerializeOutput`, `SentinelType`, `XMLNode`, `VerificationCode`, `Chorus`, `CircleDatum`, `CardRenderItem`, `IMatrixFunc`, `RouteMatch`, `MDCListFoundation`, `React.ReactChild`, `QueryCommandOutput`, `Lang`, `RSAKey`, `Swap`, `PIXI.Sprite`, `WebDNNWebGLContext`, `git.ICreateBlobParams`, `TxBroadcastResult`, `ScreenReaderSummaryStateProps`, `FunctionNode`, `XYBrushEvent`, `DepList`, `ProfileStates`, `Equiv`, `BusinessAccount`, `ValidationQueue`, `LinkedHashSet`, `Meta`, `ANGLE_instanced_arrays`, `ControlService`, `ModuleBlock`, `StatePropsOfControl`, `Callable`, `postcss.Root`, `PointerCtor`, `CascaderOption`, `InlineDatasources`, `CartesianChart`, `PublishArgs`, `IRegularAttr`, `Flight`, `DAL.DEVICE_ID_MSC`, `PredicateWithIndex`, `ShoppingCartStore`, `TimelineBucketItem`, `okhttp3.WebSocket`, `DocSourceFile`, `Wildcard`, `CascadeTestResult`, `MIRInvokeDecl`, `ListRuleGroupsCommandInput`, `_Exporter`, `SelectedCriteriaType`, `SimpleList`, `TContainer`, `AuthConfig`, `ScaleHandle`, `FilePreviewDialogRef`, `LanguageCode`, `NotificationCallback`, `FieldsConfig`, `IStashEntry`, `FetchStore`, `requests.ListRunsRequest`, `CoreTracer`, `KeyLoader`, `DebugPluginConfiguration`, `TaskModel`, `SidePanelOpenDirection`, `IStepInfo`, `Percent`, `ActionMetadata`, `BehaviorSubject`, `Key1`, `DirectBuy`, `request.OptionsWithUri`, `TVEpisode`, `TransactionHash`, `Semigroupoid2`, `SystemVerilogExportInfo`, `SelectionState`, `GearService`, `Filename`, `Assembler`, `Distortion`, `ITracerBenchTraceResult`, `ItemValue`, `BottomNavigationItem`, `ts.DiagnosticCategory`, `IAggregateStructure`, `ParsedElement`, `TextDocumentChangeEvent`, `StateModel`, `MarkupContent`, `ECClass`, `ColumnDifference`, `GetServiceRoleForAccountCommandInput`, `WindowId`, `DecodedPixelMapTransaction`, `ITestObjectProvider`, `JwtHelperService`, `RenderTextureInfo`, `UninterpretedOption`, `AggConfigOptions`, `chalk.Chalk`, `GbBackendHttpService`, `ObservableQueryValidatorsInner`, `BigAmount`, `QueryCacheKey`, `GossipError`, `TypeAliasDeclaration`, `WorkItem`, `Generation`, `THREE.Quaternion`, `ConditionsType`, `SyncedRef`, `PrimitiveValueExpression`, `SequentialTaskQueue`, `InterceptedRequest`, `ModuleG`, `SQLResultSet`, `PartialLax`, `ImagePipe`, `UserInfoStore`, `Mysql`, `RadixParticleGroup`, `StatementContext`, `Kernel.IKernelConnection`, `EvaluatedStyle`, `F.Function`, `ts.CompletionInfo`, `GUITheme`, `GatherShape`, `IVideoFileDB`, `InputElement`, `STFilterComponent`, `TopicChangedListener`, `DataVariable`, `FeedFilter`, `AdaptElementOrNull`, `StructureLab`, `NAVTestObject`, `textViewModule.TextView`, `webpack.Compiler`, `ModelRenderer`, `FileSystemCache`, `AxisCoordinateObject`, `EventsFactory`, `NodeImpl`, `CipherContext`, `Seek`, `DataDocument`, `ImportKind`, `ActionTreeItem`, `ITodo`, `Arpeggiate`, `RadixTree`, `GraphQLResolverMap`, `UpdateOrganizationConfigurationCommandInput`, `PreferredContext`, `DeleteDBClusterParameterGroupCommandInput`, `PageCollection`, `ActionBinding`, `TypeModel`, `AggConfig`, `GetChannelMessageCommandInput`, `ModuleResolutionState`, `MemoryPartition`, `B10`, `HydrationContext`, `IndexService`, `ComponentFunction`, `Alias`, `CategoryResults`, `SelectDownshiftRenderProps`, `IApplication`, `MiddlewareCallback`, `WaitTaskOptions`, `DijkstraNode`, `PartialTheme`, `HttpResponseBase`, `SegmentType`, `JobDetails`, `CreateSchemaCustomizationArgs`, `SubscriptionClass`, `LoadEventData`, `Pickability`, `PermissionMetadata`, `MiBrushAttrs`, `UpdateIntegrationResponseCommandInput`, `LayerVariable`, `common.RegionProvider`, `GlobPattern`, `ListContext`, `NgZonePrivate`, `MdlOptionComponent`, `HttpMethods`, `IntegrationCalendar`, `FileNode`, `IKeyboardFeatures`, `GeoPointInput`, `HlsPackage`, `RepositoryFile`, `RendererContext`, `IProtonAccount`, `AtomList`, `SavedObjectsMigrationVersion`, `ViewportScrollPosition`, `CoreExtInfo`, `parse5.DefaultTreeElement`, `GenericRequestHandler`, `ScriptData`, `SubstrateEvent`, `WebGLContextWrapper`, `IEmployeePresetInput`, `IHttpRequestOptions`, `IPreset`, `handleParticipantEvent`, `SendMessageFn`, `UniformPub`, `HttpMetadata`, `CW20Currency`, `UseLazyQuery`, `Changelog`, `ConditionalDeviceConfig`, `UserQuery`, `FabricEnvironmentRegistryEntry`, `OffsetIndexItem`, `LogoState`, `TypeDefinitionParams`, `TSTopLevelDeclare`, `PermissionState`, `FontProps`, `TextElementFilter`, `UnitFactors`, `MetadataArgsStorage`, `DatabaseContainer`, `ts.Declaration`, `MagicLinkRequestReasons`, `UiActionsServiceEnhancements`, `CreateResourceCommandInput`, `PubGroup`, `ThemeSetup`, `ContentData`, `ProcessInfo`, `TimePointIndex`, `Singleton`, `coreClient.OperationArguments`, `JSDocVariadicType`, `PathResolver`, `ListCtor`, `PlayService`, `MockHashable`, `helper.PageOptions`, `interfaces.Target`, `Serie`, `UpdateRouteCommandInput`, `MIRInvokeKey`, `Log`, `GoToOptions`, `MultilevelSwitchCCReport`, `PositionNode`, `Adapter`, `CrudGlobalConfig`, `AnimatorSet`, `DatasetTree`, `Moc`, `Mountable`, `UIImageRenderingMode`, `BrowserInfo`, `S.Stream`, `GetRowLevelKeyFn`, `LabwareCalibrationAction`, `CodeMirror.EditorChange`, `IReducerMap`, `RepositoryModel`, `InstrumentationLibrarySpans`, `Mdast.Link`, `SaveDialogReturnValue`, `ContractDecoratorKind`, `SaveType`, `SpeakDelegate`, `StackProperties`, `MemBuffer`, `Deque`, `LocationChange`, `Diagnostics`, `ImageMatrix`, `MockRequest`, `SeriesPlotRow`, `ArmObj`, `ClassInterpreter`, `Embedding`, `DockPanel`, `DocBlockKeyValue`, `InternalCoreUsageDataSetup`, `IRequestInfo`, `Vnode`, `MountAppended`, `GenericModel`, `ServiceRoute`, `angular.ui.IStateService`, `PiEditProjectionLine`, `AzExtClientContext`, `When`, `Cobranca`, `SupportCodeExecutor`, `StepDetailsExposedState`, `WorkspaceField`, `CSymbol`, `MenuContext`, `SingleLayerStringMap`, `IFunctionTemplate`, `SharedService`, `RemoteConsole`, `Batcher`, `OnlineUserType`, `FormatDiagnosticsHost`, `DashboardId`, `PythonPathResult`, `ImportCacheRecord`, `DrawingNode`, `ReadableStreamDefaultReader`, `VisualizationsStartDeps`, `MockCSSStyleSheet`, `ISequencedDocumentAugmentedMessage`, `TextOrIdentifierContext`, `UpdateEnvironmentCommandInput`, `VectorEntry`, `TileObject`, `CdtSite`, `DeleteEndpointCommandInput`, `InterleavedBufferAttribute`, `PostMessage`, `TopicType`, `BackgroundReplacementVideoFrameProcessorObserver`, `OptimizedSubSetKey`, `NzMessageService`, `Ring`, `SfdxCliActionResultDetail`, `Progress.IChunk`, `GetTraceSummariesCommandInput`, `TodoState`, `ConfigFlags`, `ListSchemasCommandInput`, `ListNotebookSessionShapesRequest`, `Matched`, `SystemHealth`, `NSVElement`, `CausalObjectStore`, `SelectionChange`, `IMapSettings`, `WorkerProxy`, `HeroCollection`, `View1`, `InvalidVPCNetworkStateFault`, `DetailViewData`, `ListPageSettings`, `IKey`, `TextInputLayout`, `requests.ListPingMonitorsRequest`, `IJobConfig`, `StepRecoveryObject`, `HexcolorInfo`, `DatePipe`, `AccountWithAll`, `ODataOptions`, `StyleRecord`, `ListRequest`, `UpdateArgs`, `ConnectionListener`, `PathNode`, `Evidence`, `BuildLevel`, `HappeningsInfo`, `EquipmentInfo`, `Arrayable`, `BuildingColorTheme`, `GfxRenderPipeline`, `IWorkflowPersona`, `JSheet`, `IDataType`, `FrameParser`, `IJetView`, `FullDir`, `UserTask`, `LocalTag`, `PathItemObject`, `DeclarationBase`, `HsMapService`, `LangiumDocument`, `SVGStyle`, `ts.NumericLiteral`, `JsonAstObject`, `RequestController`, `FindOneOrFailOptions`, `UserRole`, `NullAction`, `PostInfo`, `EditableRow`, `TestFunctionImportComplexReturnTypeCollectionParameters`, `IpcSender`, `TimeGranularity`, `ContentItem`, `Dispatch`, `IPathResultItem`, `IBundleWithoutAssetsContent`, `Timeout`, `LockedDistricts`, `CreateRequest`, `SimpleChanges`, `VoiceFocusAudioWorkletNode`, `DiffParser`, `Tx.Options`, `AdjustNode`, `ContentsXmlService`, `DateTimeService`, `DateSchema`, `HttpManagementPayload`, `fromTimelineActions.GetTimeline`, `WalkMemberContext`, `SelectedScope`, `IStateCallback`, `InputOnChangeData`, `IncomingForm`, `Multiset`, `BeInspireTreeNode`, `DebtPareto`, `ImageryCommunicatorService`, `PolynomialID`, `CreateFleetCommandInput`, `ThyResizeEvent`, `SafeParseReturnType`, `YTMember`, `DisplayContext`, `ActionReducer`, `SessionContent`, `TransferBuilder`, `AtomChevronElement`, `QueryState`, `StakingTransactionList`, `Parser.ASTNode`, `JsonRpcError`, `IAllExecuteFunctions`, `RenderTarget_t`, `unicode.UnicodeRangeTable`, `UtilObject`, `ApiCallByIdProps`, `S2DataConfig`, `IGetDeviceResult`, `ResponseComment`, `Specie`, `EventRecord`, `LanguageCCSet`, `MediaFile`, `FixHandlerResultByPlugin`, `BaseText`, `ArmSiteDescriptor`, `MemoizedFn`, `SimpleRNNLayerArgs`, `NotifyMessageType`, `LED`, `IBuildTask`, `ComponentRendering`, `MessageSentListener`, `AxisOrientation`, `Constraint`, `ProxyRequest`, `TranslationDictionary`, `LinkLabelsViewModelSpec`, `Themes`, `TableEvent`, `ts.DocumentRegistry`, `TavernsI18nType`, `PublicParams.Swap`, `ElasticsearchError`, `StackCollection`, `MalRequest`, `RadixAddress`, `UnauthorizedErrorInfo`, `Tokens`, `ConnectionService`, `ng.IHttpPromiseCallbackArg`, `WorldCountry`, `TileMapLayerPub`, `JSONSchema`, `RepoOptions`, `MessageObject`, `Spacing`, `SearchRecord`, `ToRefs`, `FlowListener`, `EngineOptions`, `CreateParams`, `Interception`, `FaunaRoleOptions`, `HistoryEntry`, `MsgRevokeCertificate`, `PerformRenameArgs`, `GlobalToModuleMapping`, `TransportResult`, `StkTruToken`, `ArianeeHttpClient`, `SendTable`, `WinstonLogger`, `ApplyChangeSetOptions`, `CodeFlowReferenceExpressionNode`, `OvSettingsModel`, `Guild`, `IAureliaProjectSetting`, `ResolveablePayport`, `VMenuData`, `SourceNotFoundFault`, `VdmEntity`, `GXMaterialBuilder`, `TabItemSpec`, `SettingName`, `DataViewMetadataColumn`, `sodium.KeyPair`, `any`, `ListUsersCommandInput`, `ScheduledDomain`, `MouseDownAction`, `VectorType`, `DropedProps`, `IContextErrorData`, `MutationPayload`, `DotIdentifierContext`, `requests.ListHttpRedirectsRequest`, `TRPGAction`, `ITwin`, `FrameInfo`, `JoinStrategy`, `HTMLFieldSetElement`, `MemberType`, `EventRepository`, `DebugProtocol.VariablesArguments`, `TimePickerProps`, `types.FormatTransfer`, `CreateRepositoryResponse`, `MachineEvent`, `ClientSocket`, `IConnector`, `Reflecting`, `TreeView`, `MockModelRunner`, `ImageFiltering`, `ImportCodeAction`, `DeSerializers`, `HashAlgorithm`, `FunctionDataStub`, `Root`, `DataViewsService`, `NgModuleDefinition`, `SafetyNetConfig`, `StackingContext`, `Provide`, `DocSegmentKind`, `RegExpExecArray`, `CryptoWarsState`, `FormOutput`, `IOutputs`, `FileFlatNode`, `LinearProgress`, `ReadModelQuery`, `ChangesetIndex`, `RulesByType`, `CanvasEditorRenderer`, `SubjectSetConstraint`, `PanEvent`, `CircularQueue`, `Public`, `IdentifierDecorator`, `DownloadProgress`, `AbstractVector`, `vscode.Progress`, `AggObject`, `CounterfactualEvent`, `IResultGroup`, `IProjectConfig`, `RefedMixin`, `LocalizeParser`, `CreateApiKeyCommandInput`, `TemplatingEngine`, `Hotspot`, `ListOfRanges`, `ScraperArgs`, `ITransactionProps`, `TReducer`, `ProperLayeredGraph`, `AutoImportSymbol`, `GasTokenValidator`, `LoggingService`, `EventNames`, `DependencyTree`, `BluetoothDevice`, `AppExtensionService`, `Traversable3`, `ScenarioService`, `Gem`, `JointOptions`, `PouchDB.Core.Document`, `PartyMatchmakerAdd`, `Dimension`, `FocusTrapManager`, `TracklistActions`, `PluginManifest`, `ModalsState`, `DeployFunction`, `ExoticComponent`, `BSQRegex`, `LiveAnnouncerDefaultOptions`, `ITaskFolder`, `SyntaxKind`, `CombinedJob`, `ListIPSetsCommandInput`, `typescript.CompilerOptions`, `ThenableReference`, `DurationEvent`, `LockFile`, `TeliaMediaObject`, `Toaster`, `ExecutableCallRegular`, `MIRTypeOption`, `DOMMatrixInit`, `IDeclaration`, `BuildInfo`, `webpack.loader.LoaderContext`, `ThemableDecorationRenderOptions`, `later`, `ChangedElementsDb`, `THREE.Object3D`, `IColumnIndices`, `FeatureRegistry`, `MigrationSummary`, `GreetingStruct`, `UpdateApplicationDetails`, `PointGraphicsOptions`, `ServiceFlags`, `CompletionsProviderImpl`, `TurnTransport`, `AST.AST`, `TestObject`, `FloatBuffer`, `ASTNode`, `TypeList`, `VariableMap`, `DataTableDirective`, `DisjunctionSearchQuery`, `SRoutingHandle`, `GetGroupCommandInput`, `SpatialDropout1D`, `FleetStatusByCategory`, `KeyCombine`, `Tile`, `IdentityService`, `IApprovalPolicyCreateInput`, `RemoveArrayControlAction`, `MediaModule`, `Vec4Term`, `Syntax`, `ICompileService`, `AccountEntity`, `schema.Document`, `Ingredient`, `ImageHandler`, `MultiChannelAssociationCCRemove`, `LightSetting`, `ContractsService`, `Ex`, `Tray`, `d.WatcherCloseResults`, `btQuaternion`, `InputManager`, `ViewportService`, `DoubleLinkKVStore`, `jasmine.Spy`, `AppConfirmService`, `DrawCall`, `MockComment`, `FnN`, `LayoutComponent`, `SdkStreamDescriptor`, `IBranchListItem`, `DefaultClause`, `CoreRouteHandlerContext`, `TypedDocumentNode`, `Tsoa.Metadata`, `RestPositionsResponse`, `RateLimitArguments`, `AnnotationShape`, `IHttpResult`, `DateParser`, `PatternEnumPropertyOption`, `ImageConfig`, `GherkinException`, `CreateContextReturn`, `AsyncSnapshot`, `optionsInfo`, `FalsyPipe`, `IFormItemProps`, `HMACParams`, `DescribeDashboardCommandInput`, `IAnalyticsService`, `AxisLabelFormatter`, `SchemaObjCxt`, `CreateAccountParams`, `ReducerManager`, `CreateChannelMessage`, `ProductSet`, `IButtonStyles`, `EvaluatorFlags`, `WebappClient`, `IHandlebarsOptions`, `CompletionInfo`, `GameBase`, `CallHandler`, `WebdriverIO.Element`, `ts.LabeledStatement`, `GlobalStringInterface`, `CSharpResolversPluginRawConfig`, `Discord.Message`, `lsp.Range`, `VersionedTextDocumentIdentifier`, `DynamicsContext`, `FileResponse`, `HeadersJson`, `Base`, `pulumi.Output`, `JPAExtraShapeBlock`, `EntitySubject`, `RunSegment`, `RunnableTask`, `BackupService`, `IPluginModule`, `SlideDefinition`, `WechatyPlugin`, `CronConfig`, `CameraRigControls`, `ResourceAction`, `NodeImmut`, `FactionMember`, `AutoOption`, `TEElement`, `AuthMachineEvents`, `WritableStream`, `ApiItemContainerMixin`, `Invoice`, `SearchSourceDependencies`, `ItemIdToExpandedRowMap`, `DynamoDB.UpdateItemInput`, `IRouterSlot`, `Node3D`, `ISecurityGroup`, `UiSettingsCommon`, `Model`, `ClipRectAreaModel`, `WebGL1DisjointQueryTimerExtension`, `DestinationsByType`, `GraphQLNonInputType`, `ISavedObjectsRepository`, `IDateRange`, `IRequestResponse`, `HandPoseConfig`, `ThemedComponentThis`, `FuzzyLocale`, `SankeyDiagramLink`, `Inode`, `ArgumentDefinition`, `MeshBasicMaterial`, `Giveaway`, `IPdfBrick`, `ChromiumBrowserContext`, `JobChannelLink`, `StageInfo`, `CompareFn`, `FilterFunctionReturnType`, `GraphQLDatabaseLoader`, `MessageError`, `NodeObject`, `android.view.MotionEvent`, `ElementAnalysis`, `evaluate.Options`, `IMappingFieldInfo`, `NavigateToItem`, `CookieSettingsProps`, `ServiceProviderAdapterMongoService`, `IProjectNode`, `requests.ListNetworkSourcesRequest`, `CopyOptions`, `TripleIds`, `AnnotationChart`, `ClientAssessments`, `BVEmitter`, `TestElementProps`, `WordCloudSettings`, `StrategyOrStrategies`, `OrderedDictionary`, `Filesystem.ReadJsonAsync`, `T7`, `TransferHotspotV2`, `ClassNameType`, `DebugConfigurationModel`, `BezierSeg`, `CapDescriptor`, `MochaDone`, `UpdateInfoJSON`, `FirebaseFirestore.Firestore`, `EventArgDeclaration`, `PreviouslyResolved`, `Amount`, `GetEnvironmentTemplateVersionCommandInput`, `Fetcher.IEncrypted`, `TextInputType`, `LayoutProps`, `InfiniteLine`, `EdmxEnumMember`, `Failure`, `InfluxDB`, `RenderContext3D`, `QWidget`, `CarsService`, `TupleIndexOpNode`, `Phaser.Math.Vector2`, `ComponentStory`, `DriftConfig`, `ForwardingState`, `ComponentRuntimeMembers`, `CoverageFragment`, `PropType`, `AbstractSqlConnection`, `Tweet`, `IpcAPI`, `RecordingOptions`, `ts.GetAccessorDeclaration`, `AgentConnection`, `TypedArrayConstructor`, `SupervisionCCReport`, `GrantAccessData`, `GetShapeRowGeometry`, `IChannelAttributes`, `WithoutSheetInstance`, `EnrollmentAPIKey`, `IFilters`, `MeshPhysicalMaterial`, `InputConfig`, `SentryScopeAdapter`, `ActionParamException`, `LinkedList`, `OutdatedDocumentsTransform`, `TransactionAuthFieldContents`, `TestClock`, `HttpRequest`, `LoadableClassComponent`, `IHttpGetResult`, `CannonPhysicsComponent`, `AnonymousType`, `MappedSingleSourceQueryOperation`, `Predicate2`, `_`, `ListProjectsCommandInput`, `IApplicationShell`, `SimpleAllocationOutcome`, `DescribeResourcePolicyCommandInput`, `DemographicCounts`, `IAssets`, `GlobalDeclaration`, `ErrorBag`, `IExtensionElement`, `SendOptions`, `d.BuildTask`, `XPCOMObserverTopic`, `requests.ListCloudVmClustersRequest`, `ParsedSearchParams`, `MediaRule`, `AndroidActivityResultEventData`, `ValidationMetadata`, `Loading`, `ThemeColorable`, `IEditorController`, `Dree`, `RoleResolvable`, `CLLocationCoordinate2D`, `UnlockedWallet`, `PrivateKey`, `LogInfo`, `ts.PropertyAssignment`, `ResolverFactory`, `parseXml.Element`, `AppApp`, `VariableNames`, `FloatFormat`, `GroupArraySort`, `LinkData`, `TestHelper`, `Visualization`, `TaskProps`, `SModelElement`, `WeightsManifestEntry`, `SaveGame`, `LogViewer`, `CurrentState`, `Nature`, `GettersFor`, `TKey`, `DocumentUri`, `LaunchEventData`, `INodeMap`, `TermSet`, `PredicateProvider`, `TaskExecutionSchema`, `DataKeyTypes`, `IOnValidateFormResult`, `schema.Context`, `Approval`, `AppInstanceProposal`, `LineView`, `DbCall`, `EventType.onInit`, `IStrokeHandler`, `VoiceFocusTransformDeviceObserver`, `Hash`, `TreeItemCollapsibleState`, `HookData`, `BottomNavigationBar`, `CSharpProperty`, `TestCaseInfo`, `DeleteArchiveCommandInput`, `RatingStyleProps`, `AndroidSplashResourceConfig`, `IndentToken`, `ExpandedArgument`, `ActionProps`, `mmLooseObject`, `ExecController`, `FleetStartServices`, `IImage`, `SubType`, `IStoreService`, `Package.ResolvedFile`, `AttestationsWrapper`, `MathContext`, `VercelClientOptions`, `E.Either`, `DashboardPanelState`, `AwsCallback`, `X12Transaction`, `ThroughputSettingsGetResults`, `GlyphVertices`, `RtcpSrPacket`, `DataTable.Row`, `YAMLWorker`, `DropdownMenuItemLinkProps`, `EnteFile`, `CachedQuery`, `ListColumn`, `Map4d`, `DiffResult`, `QuotePreference`, `ItemRepository`, `TSESTree.Decorator`, `UserInfoResource`, `ProjectReflection`, `OmvFeatureFilter`, `RawBuilder`, `SwitchAst`, `ClientFileSearchItem`, `ManagementAgentPluginGroupBy`, `DashPackage`, `ITestReporter`, `LookupResult`, `CurrencyMegaResult`, `WebApiConfig`, `MockMAL`, `CubicBezierAnimationCurve`, `ExternalAttributionSources`, `GraphQlQuery`, `CartService`, `V1ClusterRoleBinding`, `ConnectListener`, `TMap`, `Water`, `JProject`, `SettingsStore`, `ChartDataPoint`, `Ripemd160PolyfillDigest`, `RouteProp`, `OptimizationContext`, `SFATextureArray`, `TextWithLinks`, `DatasourceConfig`, `Club`, `VideoObject`, `PropAliases`, `UpdateMigrationDetails`, `CandidatesService`, `IGroupInfo`, `S1GRDAWSEULayer`, `TypeCondition`, `ENUM.SkillRange`, `UrlSegment`, `DefaultTheme`, `ShuffleIterator`, `ImageHandlerEvent`, `Skeleton_t`, `MockProxy`, `NullConsole`, `MSGraphClient`, `WorkflowActivateMode`, `WorkRequestOperationType`, `VisualizationsAppExtension`, `SignedResponse`, `azure.Context`, `DoubleLinkedListNode`, `NodeJS.WritableStream`, `JSExcel`, `EnhancedGitHubIssueOrPullRequest`, `EntryNested`, `DiscoverPlugin`, `Anthroponym`, `CustomState`, `FeatureState`, `AppAPI`, `CylinderGeometry`, `MatProgressSpinnerDefaultOptions`, `MinionStatus`, `Teams`, `MarkdownContentService`, `AuthTokenInfo`, `ComponentComment`, `ComponentNode`, `LookUpResult`, `KernelMessage.IIOPubMessage`, `MangleOptions`, `IntlType`, `BooleanNode`, `ExpressionValue`, `PiScopeDef`, `DirtyStyle`, `GeometryPartProps`, `TransferState`, `KBarState`, `WebDriver`, `DateTimeModel`, `ManagedEvent`, `EcsMetricChange`, `IAppEnvVar`, `TouchEventHandlerType`, `StorageObjectAcks`, `MonsterArenaStats`, `WrappableType`, `PlaylistEntry`, `BinarySensorType`, `DateBatch`, `TxOut`, `ReuseTabService`, `CollectionDefinition`, `ProductContentPipe`, `ZebuLanguage`, `DialogContextValue`, `LocatedError`, `IChatJoinProperties`, `CBCentralManager`, `ActionService`, `ParserSourceFileContext`, `CspConfig`, `DataTransferItem`, `Conference`, `SvelteSnapshotFragment`, `BugState`, `VPosition`, `SessionStorage`, `IComparer`, `ApplicationSubmission`, `MacroTask`, `IT`, `MutableRef`, `MoveLandedType`, `MicrosoftSqlServersResources`, `SYMBOL`, `PublicationView`, `config.Data`, `AbiOwnershipBody`, `AuthzService`, `ITaskConfig`, `AudioVideoFacade`, `EditorChangeEvent`, `RemoveTagsCommandInput`, `nodes.Identifier`, `OnboardingPage`, `QueueInfo`, `AffineFold`, `CupertinoDynamicColor`, `MapState`, `LocalProps`, `WithGenerics`, `WS.MessageEvent`, `OutputTargetDistGlobalStyles`, `TypePredicate`, `TestDirectEscrow`, `Admin`, `TensorContainer`, `SignShare`, `GithubAuthTokenRepository`, `ComputedUserReserve`, `IIterationSummary`, `ReadonlyQuat`, `Mars.NumberLike`, `Enumerable`, `SettingsOptions`, `Discriminated`, `IVocabularyItemWithId`, `ITxRecord`, `OverlayRef`, `CreateSubscriptionRequest`, `Crumb`, `ts.ForOfStatement`, `TComAndDir`, `DocumentFormattingParams`, `ViewRegionInfoV2`, `requests.ListIncidentResourceTypesRequest`, `CSSDocument`, `StatsTree`, `IConnectionCredentialsQuickPickItem`, `CustomControlItem`, `BasePrismaOptions`, `UnitRuntimeContext`, `SeparatorAxisTest2D`, `TestTemplate`, `DictionarySchema`, `IOdspTokens`, `QuerySort`, `MassetMachine`, `CurrentDevice`, `IPersona`, `Pick`, `ParsedTypeDetailed`, `CourseDuration`, `Container3D`, `FieldTransformConfig`, `SolutionDetails`, `EsDataTypeUnion`, `IUiStateSlice`, `MapBounds`, `League`, `ExtractorEventEmitter`, `DeleteSubnetGroupCommandInput`, `SavedObjectFinderUiProps`, `HelpList`, `IDType`, `InstanceWithExtensions`, `AudioBuffer`, `OverviewSourceRow`, `GraphAnimateConfig`, `PreviewComponentProps`, `MemoryX86`, `ITagProps`, `ListTournamentRecordsAroundOwnerRequest`, `PlayerClass`, `RuleChild`, `IGetCountsStatistics`, `AxisProps`, `CursorModel`, `UIElement`, `WhiteBalance`, `DirectionLight`, `RLANKeyframe`, `HookFn`, `InterfaceEvent`, `C`, `Timesheet`, `UnionOrIntersectionType`, `Critter`, `NotificationHandler`, `pageNo`, `ILinkedNodeWithValue`, `Free`, `ethers.BytesLike`, `IJsonStep`, `StubbedInstanceWithSinonAccessor`, `AnyArenaNode`, `HTMLVmMenuItemElement`, `DevToolsExtensionContext`, `requests.GetAllDrgAttachmentsRequest`, `SecurityService`, `LastFmApi`, `NetworkName`, `PropertyEditorParams`, `VisOptionsProps`, `CasesClientInternal`, `XAxisTheme`, `Keystore`, `NodeLoadMetricInformation`, `RepoNameType`, `vscode.DebugAdapterExecutable`, `ExceptionBreakpoint`, `IServerParams`, `ISubscriber`, `Layouter`, `UIWaterStorage`, `ShellComponent`, `StoreType`, `UVSelect`, `NormalizedConfig`, `FormEventDetail`, `SimpleType`, `MutationObserverWatcher`, `ViewportOptions`, `TimeoutError`, `CommandOptions`, `Coordinate`, `InternalTakomoProjectConfig`, `DataLoaderOptions`, `ParsedCssFile`, `Decorators`, `DaffOrderFactory`, `ISuiteResult`, `Hostname`, `NotificationComponent`, `PanelLayout`, `SheetsArray`, `PageBlobClient`, `TocItem`, `MultiValueProps`, `IEntityOwnership`, `TimerProps`, `UnitsImpl`, `FactorGradient`, `ThemeTool`, `ListCardContent`, `SuperClient`, `LoadCallback`, `ClientCredentialsResponse`, `PerfState`, `VFSEntry`, `InputEventKey`, `HsShareUrlService`, `SpriteFont`, `AlertNavigationRegistry`, `FunctionField`, `UIViewController`, `kind`, `Transaction.Options`, `TokenIndexedCoinTransferMap`, `YieldExpression`, `UIFont`, `ExternalModuleInfo`, `EnrichedDeprecationInfo`, `MatrixReader`, `NamedObjectDef`, `MockState`, `DescribePendingMaintenanceActionsMessage`, `ESTree.CallExpression`, `DecoderError`, `SiteConfigResource`, `ComponentCommentIterator`, `ODataApiOptions`, `APIGatewayProxyHandler`, `MonitoringData`, `ResourceRecord`, `VectorTransform`, `ResponsiveAction`, `BuildMatch`, `requests.ListRunLogsRequest`, `GetEndpointCommandInput`, `AppLeaveHandler`, `MobileCheckPipe`, `InterfaceWithConstructSignatureOverload`, `cg.Color`, `Consultant`, `LoginResultModel`, `GitHubItemSubjectType`, `TransactionCtorFields`, `XhrFactory`, `IHttpPromise`, `DuiDialog`, `ArianeeWalletBuilder`, `MockElement`, `MarkMessageAsSeenCommand`, `CrochetTypeConstraint`, `DiagnosticRuleSet`, `AbbreviationInfo`, `ESTestIndexTool`, `Danmaku`, `PolicyResult`, `ProcessRequestResult`, `BasicLayoutProps`, `SafeString`, `d.CompilerModeStyles`, `UpdateFn`, `EventSystemFlags`, `TextContent`, `CompilerEventBuildLog`, `UpSetQuery`, `ITemplatedBundle`, `VirtualNetworkGatewayConnection`, `PopoverOptions`, `RecordSource`, `ServiceMetadata`, `StepResultGenerator`, `PolyBool.Shape`, `TKey1`, `VRMCurveMapper`, `FileRange`, `Uniform`, `ThemedStyledProps`, `Matrix2d`, `IAlertProps`, `ConstantNode`, `Python`, `ResponseBuilder`, `BlockingResponse`, `KernelSpec`, `Engine`, `ResolvedCSSBlocksEmberOptions`, `ScoreHeader`, `pd.FindSelector`, `AureliaProgram`, `VisualizationData`, `JSX.TargetedEvent`, `SwappedToken`, `HttpChannelWrapper`, `ConnectionData`, `requests.ListExportsRequest`, `ISectionProps`, `ResponderConfiguration`, `SourceBufferKey`, `BroadcastService`, `DaffCategoryFilterEqual`, `OptimizerConfig`, `PropValidators`, `SystemStyleObject`, `ClientData`, `InternalInstanceState`, `SystemIconStyles`, `StringLiteralLike`, `CloudFormation`, `StacksOperationOutput`, `ReactClient`, `PhraseFilterValue`, `angular.IScope`, `NodeDisplayData`, `ValuedConfigurationMetadataProperty`, `JSONRoot`, `DMMF.Model`, `requests.ListIdpGroupMappingsRequest`, `TokenPricesService`, `XcomponentClass`, `RoomStoreEntryDoc`, `AmbientZone`, `OrderStatusReport`, `Inputs`, `FragmentElement`, `Datasource`, `CommandStatus`, `EightChar`, `express.Router`, `UpdateInputCommandInput`, `CodeScopeProps`, `s.Node`, `TransactionsBatch`, `EvolvingArrayType`, `IUserSession`, `Device`, `EmaSubscription`, `HTMLCanvas`, `ImageEffectDirector`, `RepoClient`, `CLValue`, `HdLitecoinPaymentsConfig`, `SubmissionObject`, `IQuestion`, `SimpleItemPricing`, `ISnapshot`, `ApplicationOpts`, `workerParamsDto`, `angular.IRootScopeService`, `SchemaDifference`, `IAtomMdhd`, `ShapeInfo`, `DefaultIdentity`, `GX.BlendMode`, `CaseInsensitiveMap`, `IExecOptions`, `BuildNode`, `TimePickerModel`, `IgnoresWrappingXmlNameCommandInput`, `BehaviorNode`, `ActionCreatorWithoutPayload`, `FieldsInModel`, `DomainDeprecationDetails`, `ButtonStyles`, `TranslatePipe`, `DataMessage`, `ColorRGBA`, `PropertyContext`, `UpdateExceptionListItemSchema`, `PhantomWallet`, `OptionTypeBase`, `CdkDragDrop`, `StandardClock`, `RadarPoint`, `TJS.Definition`, `GmailResponseFormat`, `ParameterizedValue`, `DeleteLoggingConfigurationCommandInput`, `NgGridItemPosition`, `DisplayListRegisters`, `SelectContext`, `PreviewSettings`, `StyleHelpers.QuoteInput`, `TextPlacements`, `TreeStateObject`, `IssueWithStatus`, `MimeContent`, `ITriggerResultObject`, `vscode.DebugConfiguration`, `ConfigLoader`, `Tournament`, `ListConfigurationSetsCommandInput`, `ContentOptions`, `IViewProps`, `ChangelogJson`, `FormatGraph`, `OnPreResponseInfo`, `FormControlState`, `CarouselButton`, `ShaderType`, `ToolAssistanceInstruction`, `VideoInputDevice`, `RequestService`, `ReqMock`, `SignerPayloadJSON`, `PartitionFilter`, `CustomFormControl`, `BaseManifestGenerator`, `BlockchainGatewayExplorerProvider`, `ContractDeployer`, `ConverterContext.Types`, `FunctionC`, `VanessaEditor`, `MemoryEngine`, `InitializeStateAction`, `PrimitiveSelection`, `TextureCubeFace`, `ActivatedRouteSnapshot`, `ElementProps`, `TSPosition`, `TargetResponderRecipe`, `ISearchLocation`, `IOObjectSet`, `ClassEntry`, `PlansCategories`, `bank`, `EncryptedMessageWithNonce`, `StepExtended`, `ListDeviceEventsCommandInput`, `PagesService`, `DomainEndpointOptions`, `RegistryVarsEntry`, `BuildPackage`, `ThermocyclerModuleState`, `Variant`, `Stats`, `CreateAppFunction`, `PatchFunction`, `B12`, `GroupInfo`, `DecodingTransformer`, `LocalizedError`, `NormalizedVertex`, `CompiledExecutable`, `LanguageModelCache`, `ISignalMessage`, `SliderEditorParams`, `T10`, `IntrospectionField`, `ts.TextRange`, `TSTypeParameter`, `VariableGroupData`, `ElasticsearchClientConfig`, `MdxTexture`, `SystemService`, `NzNotificationService`, `UpdateCommand`, `MIRResolvedTypeKey`, `GroupingCriteriaFn`, `Columns`, `HTMLOListElement`, `Highcharts.AnnotationControlPoint`, `CreateSiteCommandInput`, `settings.Settings`, `ColorStop`, `IInterceptorOptions`, `DOMRectList`, `IController`, `CLICommand`, `SpeakersState`, `ECDb`, `RenderResult`, `INodeCredentialDescription`, `S3.GetObjectRequest`, `PadData`, `OptimizeModuleOptions`, `SourceGroup`, `GauzyCloudService`, `EmbeddableStateTransfer`, `EventSource`, `THREE.Camera`, `ExtractorContext`, `Assignment`, `ChainableElement`, `ExpressionValueSearchContext`, `ThyUploaderConfig`, `TSpan`, `PartialTypeGuard`, `Conversion`, `DOMNode`, `ResEntry`, `Mutable`, `ExecaError`, `ResponsiveService`, `DeleteCampaignCommandInput`, `FileIncludeReason`, `TargetedMouseEvent`, `AlfredConfigWithUnresolvedTasks`, `ClientPayload`, `TileLoader`, `IFakeFillerOptions`, `TemplateWithOptionsFactory`, `IWaterfallSpanOrTransaction`, `QueuedEvent`, `ConstRecord`, `EventObject`, `NgModuleType`, `ExpressionContainer`, `SkillMapState`, `ReactQueryMethodMap`, `VMoneyOptions`, `FusedTeamMemberType`, `VisToExpressionAst`, `DebugProtocol.InitializeRequestArguments`, `DateRangeValues`, `RowTransformCallback`, `SubmissionQueueItem`, `React.ReactInstance`, `IBasicSession`, `ElementRefs`, `PluginInitializer`, `TCountData`, `AstVisitor`, `AccessKey`, `CF.Get`, `RoomPosition`, `Imported`, `ECPoint`, `LocalOptionsMap`, `IStage`, `Review`, `IBoot`, `RematchRootState`, `A5`, `ProductUpdateReason`, `DevicesStore`, `EngineRanking`, `UserDto`, `OperationDefinitionNode`, `d.Logger`, `TrialType`, `HeaderMapType`, `Errno`, `CspConfigType`, `NinjaPriceInfo`, `tfc.io.ModelArtifacts`, `SVGTemplateResult`, `RowViewModelFactory`, `requests.ListBlockVolumeReplicasRequest`, `TLinkedSeries`, `DatabaseObject`, `RectangleProps`, `IBinaryDataConfig`, `StoryApi`, `HalfEdgePositionDetail`, `BitcoinBalanceMonitorConfig`, `Bunjil`, `UseInfiniteQueryResult`, `ISourceNode`, `CoreOptions`, `WebGLTexture`, `FileSystemEntryKind`, `CardProps`, `NamedImportsOrExports`, `EndpointName`, `SerializedNodeWithId`, `Monad2`, `ColorOverrides`, `MockSerialPort`, `AnomalyRecordDoc`, `IFluidResolvedUrl`, `Show`, `LanguageOption`, `RBNode`, `ArcTransactionDataResult`, `CustomQueryState`, `SentryCli`, `ResourceProps`, `CommandExecution`, `ArticleState`, `GenericTable`, `LinkInfo`, `ToTypeNode.Context`, `IValueChanged`, `TypedKeyInfo`, `DocumentSymbolCollector`, `DocumentClient.QueryInput`, `T9groups`, `PluginInitializerContext`, `UpdatePhotoDto`, `IConsumer`, `ITagsState`, `CustomError`, `MatchRecord`, `u8`, `IterationStatement`, `apid.AddRuleOption`, `IVueComponent`, `SignInOutput`, `Toast`, `BigIntConstructor`, `FlameGraphNode`, `ISolution`, `RTCSctpTransport`, `KeyboardEventHandler`, `TextFieldWithSelectionRange`, `BsModalService`, `UrbitVisorState`, `PartitionConfig`, `ISharedObjectRegistry`, `ReactCrop.Crop`, `NzCalendarHeaderComponent`, `QMParam`, `ISharedFunctionCollection`, `TestNode`, `SkeletonField`, `IAutoEntityService`, `ImageFormatTypes.JPG`, `CmbInstance`, `UserClients`, `ExecOptions`, `ICfnFunctionContext`, `TooltipController`, `FindArgs`, `TRoutes`, `ProxyAgent`, `DefaultDataServiceFactory`, `DescribeDatasetCommandInput`, `ParseTree`, `TilemapData`, `RecipientCounts`, `DaffCategoryFilter`, `ComponentResult`, `ComputeVariant`, `EuiBasicTableColumn`, `DataLayer`, `IColorValueMap`, `BeButtonEvent`, `BridgeDeploy`, `PerformanceEntry`, `ClientBuilder`, `TransportWideCC`, `WebRTCConnection`, `BattleCommitment`, `OptionObject`, `IProcess`, `ObservableApplicationContextFactory`, `TransitionDefinition`, `Matrix44`, `WebsocketProvider`, `CommandCreator`, `CaptionElementProps`, `EsLintRule`, `QueryHints`, `StageInterviewRepository`, `OnDiskState`, `FormSubmissionState`, `NoiseModule`, `AFSReference`, `MutableGeoUnitCollection`, `GraphQLEnumValue`, `VAceEditorInstance`, `ExtensionPriority`, `RoleDto`, `HTMLTableCellElement`, `TxMassMigration`, `VAF1`, `UserPrivilegeService`, `LocalStorageSinks`, `AnimationChannel`, `TraceEvent`, `ParsedSite`, `Convert`, `IProductOptionTranslatable`, `J3DModelInstanceSimple`, `URLTokenizer`, `SerializedAnalysis`, `ISemver`, `MeetingSessionStatus`, `TreeEntry`, `ActionFactoryDefinition`, `DescribeInstancesCommandInput`, `AuthProps`, `IError`, `ParsedDID`, `IndexKind`, `DeployHelpers`, `SampleDataType`, `LoginUriView`, `GraphType`, `TableAccessByRowIdStep`, `ETHOption`, `DegreeType`, `Cmd`, `TimeOpNode`, `EidasResponse`, `ISPRequest`, `ListRunsRequest`, `SqrlConstantSlot`, `OpenLinkProfiles`, `UberChart`, `MatchmakerRemove`, `ACLCanType`, `Type_Enum`, `ESTree.Node`, `avcSample`, `XScaleType`, `HsAddDataOwsService`, `requests.ListCustomProtectionRulesRequest`, `SeriesIdentifier`, `ParserServices`, `SteeringPolicyAnswer`, `RemovableAtom`, `IScribe`, `JSONCacheNode`, `ResAssetType`, `ResolvedDependencies`, `ProfileRecord`, `TestWorker`, `OperationDefinition`, `Aspects`, `IExtentStore`, `BinaryOperatorToken`, `ExtendedLayer`, `RSTPreviewConfiguration`, `NodeTypeMetricCapacity`, `Translator`, `AsyncRequestHandler`, `CompositionItem`, `PrismaObjectDefinitionBlock`, `GetCertificateResponse`, `TextOffset`, `OpenYoloWithTimeoutApi`, `PolyIntEdge`, `RedisCache`, `TestFileSystem`, `XmlEmptyListsCommandInput`, `StateProps`, `MessageValue`, `RulesObject`, `SnapshotListParams`, `KeyStrokeOptions`, `UpdateAppInstanceUserCommandInput`, `GasOptionConfig`, `Blog`, `PDFObject`, `PixelType`, `LineUp`, `LedgerDigestUploadsName`, `Proposal`, `SSRHelpers`, `XSLTokenLevelState`, `IntegrationTenantService`, `TreemapSeriesType`, `StudioComponentInitializationScript`, `WebpackTestBundle`, `NativeTexture`, `VersionInterface`, `ConditionalTransferCreatedEventData`, `FacemeshOperatipnParams`, `DeadLetterConfig`, `UserAsset`, `IContainerRuntime`, `CreatedTheme`, `SchemaType`, `Svg`, `Translations`, `RoxieService`, `StencilOp`, `InterleavedBuffer`, `DispatcherPayloadMeta`, `InsecureMode`, `FeeEstimateResponse`, `Zip`, `IEntityGenerator`, `ExtendFieldContext`, `InstanceProps`, `ItemField`, `AuditorFactory`, `PageHeader`, `RtkRequest`, `Def`, `GetResourcePolicyCommandInput`, `MemDown`, `pxt.Asset`, `PDFAcroCheckBox`, `ToggleCurrentlyOpened`, `NonNullable`, `CalendarApi`, `AuthenticationStrategy`, `messages.TableRow`, `OmvFeatureFilterDescription`, `ManifestContext`, `IDashboardConfig`, `EyeGazeEvent`, `TurnClient`, `ImmutableListing`, `ValueNode`, `MeshInstance`, `RevealConfig`, `SemanticDiagnosticsBuilderProgram`, `ParameterInjectInfoType`, `RPC`, `ParsedQs`, `React.LegacyRef`, `SchemaMetadata`, `EntityNameOrEntityNameExpression`, `GenericAction`, `StyledElementLike`, `ResourceDataGridWrapper`, `TrackedCooldown`, `SnapshotQuotaExceededFault`, `JPABaseParticle`, `ReadConditionalHeadersValidator`, `IOperandPair`, `LinkRenderContext`, `IEstimation`, `FilterStateStore`, `VisiterOption`, `AppHookService`, `TextDocumentItem`, `CameraGameState`, `NativeView`, `MdcRadio`, `CHILD`, `Events.prekill`, `QueueReceiveMessageResponse`, `requests.ListAccessRulesRequest`, `HtmlContextTypeConvert`, `Eci`, `ToolbarIconButtonProps`, `MultisigTransaction`, `LensMultiTable`, `WhereExpression`, `AppenderConfigType`, `ExcludedConditions`, `ReadonlyMat4`, `Project.Root`, `Element_t`, `WorkBook`, `InstrumentName`, `FaceInfo`, `DateOrDateRangeType`, `ListsState`, `DateService`, `MenuStateModel`, `Awaited`, `Security2CCMessageEncapsulation`, `JSDocNameReference`, `WebpackConfig`, `BeanObserver`, `ThyTreeNode`, `GenericTestContext`, `ConfirmationDialogService`, `OverlayConnectionPosition`, `Sessions`, `BLSPubkey`, `IIteratee`, `LegacyWalletRecord`, `CognitoMetricChange`, `IpAddressWithSubnetMask`, `bigInt.BigInteger`, `IBlobSuperNode`, `PSTNodeInputStream`, `ITestStep`, `NotifyParams`, `ECPair`, `FileSystemWatcher`, `NexusGraphQLSchema`, `HelpError`, `requests.ListPluggableDatabasesRequest`, `DirectoryIndexOptions`, `OperationArgs`, `TreeDirItem`, `ImageVideo`, `PyrightFileSystem`, `ITimeOffPolicy`, `AuthenticateCustomRequest`, `NodeJS.ReadWriteStream`, `unreal.Message`, `MessageDescriptor`, `VercelConfig`, `Hermes`, `EditMediaDto`, `AnnotationVisitor`, `MediaQueryList`, `METHOD`, `PolyPoint`, `LogAnalyticsSourceDataFilter`, `CarouselConfig`, `PackageRelativeUrl`, `TraderWorker`, `VoiceFocusSpec`, `InputMap`, `IpRecord`, `VMContext`, `IIMenuState`, `MIRStatmentGuard`, `DeleteBranchCommandInput`, `Tagging`, `IWriteAbleSetCombination`, `LoginFieldContainer`, `PersonaId`, `ErrorCorrectionLevel`, `AirPacker`, `EntityStatus`, `ng.ILogService`, `MemoryDebe`, `GetBucketLifecycleConfigurationCommandInput`, `FileOpItem`, `TemplatePart`, `EfParticle`, `S3Client`, `DrawType`, `TableConstructor`, `AssetState`, `ThemeValueResolver`, `DeliveryTarget`, `ChatThreadClient`, `SerializationOptions`, `IDockerComposeOptions`, `ng.auto.IInjectorService`, `FfprobeData`, `RelationType`, `ItemTypeNames`, `EpochTracker`, `AddApplicationReferenceDataSourceCommandInput`, `UIButton`, `ITile`, `AggregateRowModel`, `TE`, `d.RollupChunkResult`, `DataUp`, `RangeDataType`, `TypeInferences`, `LeaveGroupRequest`, `FeatureItem`, `ItemStorageType`, `td.WebRequest`, `ErrorMessageTracker`, `APIProvider`, `WebGLCoreQuadOperation`, `IStackStyles`, `FlowTypeTruthValue`, `SlpRefType`, `ProxyConfig`, `TrimmerTheme`, `OptiCSSOptions`, `IAward`, `ChannelResolvable`, `IDatabaseDataSource`, `PostProps`, `ReactChild`, `HostCreatedInstance`, `Protocol.Network.ResponseReceivedEvent`, `PutAccountSendingAttributesCommandInput`, `ViewElement`, `EngineerSchema`, `DeleteAnomalyDetectorCommandInput`, `SelectValue`, `ChatPlugContext`, `TelemetryOptions`, `HydratedFlag`, `HandlerDelegate`, `Viewpoint`, `OfflineAudioContext`, `IAboutService`, `DidCloseTextDocumentParams`, `GLSL`, `HapiResponseAdapter`, `PersonService`, `SignatureInformation`, `IKeyboardBindingProps`, `StringSymbolWriter`, `RLYTPaneBase`, `GridDataState`, `CardRenderDynamicVictoryPoints`, `StoredDocument`, `RoleTuple`, `CfnExperimentTemplate`, `ActivitySettings`, `RulesetVariable`, `RightResolvable`, `IAuthHeader`, `NormalizedMessage`, `InheritanceChain`, `DialogflowApp`, `CardConfig`, `MetricId`, `RequestSuccessAction`, `NodeTag`, `ConceptResponse`, `Pass1Bytes`, `GlobalsService`, `QueryChannelRangeMessage`, `ChangelogEntry`, `SfdxFalconResultRenderOptions`, `TablePaginationConfig`, `MutableCategorizedStructProperty`, `ProgressReporter`, `Re_Exemplar`, `Queued`, `CRUDEngine`, `SceneGraphNode`, `SfdxFalconInterview`, `ValidationRule`, `TodoComment`, `ReindexActions`, `Route53`, `SecurityGroupRule`, `CoreModule`, `PROVIDER`, `CalculateBoundsOptions`, `AutoTuneMaintenanceSchedule`, `DamageType`, `ExprVisState`, `LayoutFacade`, `Share`, `CameraService`, `ShaderSemanticsEnum`, `MysqlError`, `Span_Event`, `CodeActionKind`, `SubjectsBounds`, `SVGNode`, `PddlSyntaxNode`, `InMemoryStorage`, `LoginReq`, `IAnimationState`, `EnumDictionary`, `HistoryQuery`, `backend_util.Activation`, `AsyncStepResultGenerator`, `UserView`, `EmptyAsyncIterable`, `VSCServerManagerBase`, `OuterExpressionKinds`, `PublicMethodsOf`, `ExtractorData`, `RtcpRrPacket`, `Orders`, `SimpleObjectRenderer`, `SecurityClassOwner`, `GrantIdentifier`, `NodeWithScope`, `NjsActionData`, `ModelMetadata`, `TestDialogConfig`, `FunctionalLayout`, `NonNullableSize`, `MomentValidator`, `IAssetMetadata`, `RelationField`, `AutoArchiveSettingsDelegate`, `EntryPoint`, `Parts`, `ForwardingStatus`, `ColorStyleProps`, `NameOrCtorDef`, `FileScan`, `express.Express`, `ContainerInstance`, `AccountParser`, `IntraDayDataSourceType`, `LocalEnv`, `DidChangeLabelEvent`, `BlobItem`, `WFWorkflowAction`, `MgtTemplateProps`, `displayCtrl.IInitConfig`, `ParsedTestObject`, `Enforcer`, `IPolygonData`, `GoldenLayout.ContentItem`, `TargetColumnGeometry`, `Polymer.Element`, `IPluginBinding`, `CommunicationIdentifierKind`, `LengthParams`, `ArmParameters`, `PlasmicLock`, `DequeueSharedQueueResult`, `DecodedRouteMode`, `ISuggestion`, `ICommandItem`, `ListCore`, `GoogleFont`, `TodoItem`, `Bingo`, `PointerUpdateTrigger`, `IComplexTypeEx`, `GitPullRequest`, `GithubBranch`, `ApiKey`, `DescribeReservedElasticsearchInstanceOfferingsCommandInput`, `MessengerTypes.SendOption`, `DescribeOfferingCommandInput`, `IGBCoreService`, `ZipIterator`, `GauzyAIService`, `IUploadResult`, `DescribeExecutionCommandInput`, `ExpressionAttributes`, `IndexRangeScanStep`, `PublicPlayerModel`, `TOut`, `PLSQLSymbolKind`, `SearchPattern`, `RoutingIntent`, `Blip`, `HelloMessage`, `ViewInfo`, `Frontier`, `ICompetitionDefault`, `IntersectionType`, `FormlyTemplateOptions`, `Linear`, `ScopeTreeRow`, `OpeningHour`, `InterpolationCurve3dOptions`, `SendTxnQueryResponse`, `ExperimentDocument`, `EventDispatcher`, `ReadModelInterop`, `IInstrument`, `K.LiteralKind`, `JSONLocationFunction`, `LintResult`, `LiteralValue`, `OperationStatus`, `IMiddlewareGenerator`, `IRouterliciousDriverPolicies`, `msRest.OperationSpec`, `displayCtrl.ICtrl`, `blockClass`, `CoreConnector`, `ChartJSService`, `DataHandler`, `IQueryBuilderPart`, `FrameRateData`, `BaseImageryMap`, `IMdcCheckboxElement`, `ReactiveEffectRunner`, `SimpleTypeFunctionParameter`, `IStarterDependency`, `ExpressRequestAdapter`, `MockAlexa`, `SettingsState`, `SearchBar`, `TrackedMap`, `IStackItemStyles`, `MarketData`, `ComponentLayoutStyleEnum`, `WorkerFunction`, `Boundaries`, `P2PRequest`, `t.Transformed`, `JSONInMemoryCache`, `ComposeSubscriber`, `NgAddOptions`, `NeedleResponse`, `Collectible`, `RhoContext`, `IModifierKeys`, `UsePaginatedQueryData`, `ResizeInfo`, `DalBoard`, `ConvLayerArgs`, `ExternalEmitHelpers`, `SandboxType`, `ValidationMessage`, `CollectBBox`, `IBsLoadingOverlayOptions`, `RestObject`, `Web3Service`, `BitcoinishTxBuildContext`, `StaffDetails`, `IdeaEntity`, `XYLayerConfig`, `P.Parser`, `AlertController`, `H`, `MagicOutgoingWindowMessage`, `ListAutoScalingConfigurationsCommandInput`, `ModalWindowProps`, `SnapConfig`, `JwtConfigService`, `WantedTopics`, `Core.Color`, `NibbleDisk`, `RouteExecutionFromOutput`, `Promisify`, `SerializableError`, `IListFormResult`, `BitmapText`, `nodes.Stylesheet`, `IOOption`, `GraphExportedPort`, `UpdateSettingModelPayload`, `BaseHeader`, `RespostaModel`, `ConfigRepository`, `DeleteStackCommandInput`, `PartialC`, `DeployedBasset`, `ISubnet`, `Viewer.SceneGroup`, `Editor`, `CesiumEvent`, `Wizard`, `RouteDependencies`, `DragTarget`, `GetArgs`, `ForceSourceDeployErrorResponse`, `LogicalType`, `GuildBasedChannel`, `ChangeInfo`, `SplitTest`, `X86Context`, `interfaces.MetadataReader`, `SnotifyToast`, `NavigatorParams`, `ConcreteSourceProvider`, `ContainerBase`, `TestAnalyzer`, `StatementedNode`, `RnM2Primitive`, `AppNode`, `ClientException`, `FeeRate`, `TwComponent`, `TaskRunnerCallback`, `AnimationTransitionMetadata`, `UpgradeDomain`, `TaskOptions`, `BabelChain`, `ICategoryCollectionState`, `MediasoupPeer`, `StaticConnectionType`, `SystemUnderTest`, `NgxTranslations`, `MDCTextFieldLineRippleAdapter`, `StateWithNewsroom`, `requests.ListAutoScalingConfigurationsRequest`, `InternalDatasource`, `ConfigLoaderResult`, `SecurityKey`, `VersionRange`, `VisTypeTimeseriesVisDataRequest`, `AnimatableColor`, `CreateTokenAccount`, `ReadStorageObjectsRequest`, `ComponentTester`, `UserIdentifier`, `SqlToolsServiceClient`, `ContextValues`, `BooleanResponse`, `VerifyJwtOptions`, `SpectatorServiceFactory`, `ErrorFn`, `WordcloudSeries.WordcloudFieldObject`, `WidgetControl`, `cheerio.Element`, `Int32List`, `UserPresence`, `DescriptorValue`, `ISpan`, `NoteSnippetEditorRef`, `TransactionWithStatus`, `ImageUrlTransformationBuilder`, `NumBopType`, `IntrinsicFunction`, `IEntryDefinition`, `ExtractCSTWithSTN`, `ModifyPoint`, `TransactionFormState`, `RequestType`, `BooleanExpression`, `Contexts`, `Collision`, `MagickReadSettings`, `SPNode`, `LimitedTypedData`, `Bluetooth`, `SubmitProfile`, `angular.IDeferred`, `GrowableXYZArrayCache`, `Types.Authentication`, `ArrayBuilderSegment`, `ICurrentArmy`, `PositionChildProps`, `StringValue`, `RTCPeerConnection`, `ListNotificationsCommandInput`, `FnAny`, `TxIn`, `UserData`, `InvitationDTO`, `IDEntry`, `StackProc`, `SourceConfig`, `PartialApplicationConfig`, `MockContainerRuntimeForReconnection`, `EntryControlCCConfigurationReport`, `azureTable.Table`, `Stub`, `AdbClient`, `GetResourcePoliciesCommandInput`, `AStarNode`, `ArgumentContext`, `AngularEditor`, `TooltipStateReturn`, `RequestEntry`, `CreateMasternode`, `ToastController`, `LanguageServiceHost`, `ItemMetadata`, `Song`, `UserModel`, `ApmPluginContextValue`, `GraphCalculator`, `TSReturn`, `UpdatePartial`, `IRepo`, `FunctionDefinition`, `ElementData`, `ParsedDevModuleUrl`, `ObjectRemover`, `BottomNavigation`, `MixinTable`, `IConvertContext`, `DereferencedSchema`, `MyEditor`, `ToolbarOrientation`, `EventFactory`, `LayoutParams`, `ListProfilingGroupsCommandInput`, `GetDeploymentsCommandInput`, `EnumDescriptorProto`, `IntrospectFn`, `ResolvedPackage`, `AssignedContentType`, `BriefcaseConnection`, `Applicative4`, `FeedbackRecord`, `UseCase`, `BubbleLegendItem.RangesOptions`, `IRecordReference`, `IDateFilter`, `Miner`, `MultiLineStringDataVariant`, `PageDescriptor`, `IFeatureSet`, `AlphaDropoutArgs`, `ListFunctionsCommandInput`, `TaskTypes`, `MultiSelectProps`, `FractionalOffset`, `Elem`, `NodeDefinition`, `CachedToken`, `MessagingDeviceResult`, `SettingOption`, `IssueCommentState`, `StyleMapping`, `NormalizedProvider`, `ThemeContextValue`, `SchemaFunctionProperty`, `CustomResourceRequest`, `PeopleSearchScroller`, `AtomizeNecessary`, `SelectInfo`, `NumRange`, `ThemeSpec`, `CommitOptions`, `InMemoryDriver`, `CreateStateContainerOptions`, `GuideType`, `FileManager`, `DashboardProps`, `TaskFunction`, `CreateArgs`, `LocalVarInfo`, `AddressChainType`, `RequestHandlerContext`, `RenderColumn`, `RemirrorManager`, `UserSubscriptions`, `AcceptResult`, `HTMLOptionElement`, `Indexes`, `StoreMetaInfo`, `IRenderContext`, `PhysXPhysicsMaterial`, `MathViewProps`, `Uri`, `gameObject.Fish`, `TypeDefs`, `EngineMiddleware`, `UrlMatchResult`, `ReaderMiddleware`, `Reward`, `IJSONSegment`, `JoinCandidateBuilder`, `ZoomSettings`, `RawOptions`, `BigIntInstance`, `PiBinaryExpression`, `CreateProjectCommandInput`, `BSplineSurface3dH`, `ExtensionModule`, `MintInfo`, `IFormFieldData`, `MigrationContext`, `DeployingWallet`, `StaticService`, `HKT`, `LinearSweep`, `GetRepositoryPolicyCommandInput`, `Exec`, `ISmsOptions`, `requests.ListApmDomainsRequest`, `MigrateOptions`, `GaugeStatus`, `GithubConfiguration`, `WorkRequestError`, `ComponentsCompiler`, `PostService`, `InteractionService`, `LanguageCCReport`, `ValueOf`, `KeyframeInfo`, `MDCDrawerAdapter`, `Init`, `UhkDeviceProduct`, `DescribeLoadBalancerAttributesCommandInput`, `RegistrationPage`, `$N.IBaseNode`, `GeoLevelInfo`, `ITagSet`, `MeetingCompositePage`, `ChannelList`, `Relations`, `AuthorizationNotFoundFault`, `FIRDatabaseReference`, `AccessControl`, `NgModuleRef`, `ICloudFoundryCreateServerGroupCommand`, `SearchProfilesCommandInput`, `Font`, `TestAccount`, `commonmark.Node`, `TextEditorPropertiesMain`, `IKeyPair`, `ActionGroup`, `MDCMenuFoundation`, `PointCloudOctreeNode`, `AggregateField`, `RTCIceParameters`, `IActorContext`, `IWebhookRequest`, `CssProperty`, `AnyRouter`, `URI`, `CachingLogViewer`, `TService`, `OptionalEntry`, `OptionEntry`, `Stem`, `FilterManager`, `Repeater`, `MatButtonToggleChange`, `ServerSettings`, `RuntimeWorker`, `CourseName`, `MdcSwitch`, `SessionState`, `SupportedExt`, `models.IArtifactProvider`, `IQuery`, `EngineArgs.DiagnoseMigrationHistoryInput`, `ODataService`, `DaffCategoryFilterRangeNumericFactory`, `FileEditActions`, `SourceCodeInfo_Location`, `EncryptedDataKey`, `GitHubApi`, `MosaicPath`, `EnvValue`, `JsonDocsUsage`, `FacadeConverter`, `HierarchyRectangularNode`, `ReadableStreamReader`, `SnotifyToastConfig`, `DescribeFargateProfileCommandInput`, `FormBuilderConfiguration`, `Auction`, `NodeOutput`, `PixelRendr`, `ClientStatus`, `PluginCallbacksOnSetArgument`, `IAnimatedCallback`, `commandParser.ParsedCommand`, `XRFrame`, `IIOPubMessage`, `TestApp`, `TypeOptions`, `SimpleAnalyzer`, `PutAccountDedicatedIpWarmupAttributesCommandInput`, `StepResultAfterExpectedKey`, `EventsService`, `GeometryValue`, `ISceneLoaderAsyncResult`, `GoalService`, `DirectoryWatcher`, `MarkdownString`, `fixResults`, `SelectableItem`, `e`, `IMapper`, `com.stripe.android.model.PaymentMethod`, `Pokedex`, `OptionMessage`, `IGetItem`, `IHandlerAdapter`, `ICallback`, `TransitionCheckState`, `StateInline`, `TimechartHeaderProps`, `PointS`, `Kind2`, `ShareMap`, `FieldFormatsRegistry`, `AsyncArrayCallback`, `ChainFunction`, `MultiStats`, `CategoryCollection`, `requests.ListChannelsRequest`, `Canvg`, `formatLinkHeader.Links`, `RpcServerFactory`, `DAOcreatorState`, `SiteClient`, `ConfigurationPropertyValue`, `Fixed18`, `ServiceContainerConfig`, `SessionManager`, `ServiceScope`, `AV1RtpPayload`, `WrappedWebGLProgram`, `State.Transaction`, `MulticallClient`, `UInt16`, `SmsProvider`, `PixelMapTile`, `Mesh2D`, `NormalizedOption`, `SimpleASTNode`, `LogicAppInfo`, `FileWriter`, `KeyboardNavigationHandler`, `PatchOptions`, `NumberLabel`, `CONFIG`, `PathItem`, `DoubleMapCallback`, `ColorFunc`, `FileTransportInstance`, `EventListener`, `ServiceEndpointPolicy`, `IResolvers`, `NavbarProps`, `ICreateCommitParams`, `FormEventHandler`, `Toggle`, `IProjectMetadata`, `IDBEndpoint`, `MagicSDKWarning`, `TimeSeries`, `DayResults`, `ImagesContract`, `Cypress.Response`, `StyleSheetList`, `ReaderConfig`, `Expectation`, `NavAction`, `TSESTree.ClassDeclaration`, `esbuild.Plugin`, `DoublyLinkedListNode`, `ListingModel`, `IncomingDefault`, `DataBeforeRequestOptions`, `CGPoint`, `PutResourcePolicyCommandOutput`, `PresenceSync`, `LoginDTO`, `OidcRegisteredService`, `RoleState`, `IExpressionEvaluationContext`, `AcceptTokenResponse`, `AppHistory`, `IUri`, `WorldObject`, `Chord`, `NonCancelableCustomEvent`, `ListenerType`, `IAmazonNetworkLoadBalancerUpsertCommand`, `CityBuilderStore`, `ImportedNamespace`, `EmbeddedViewRef`, `pf.StackContext`, `_.Iso`, `Class`, `EndpointBuilder`, `UpdateJobResponse`, `RefactoringWithActionProvider`, `SearchProps`, `jest.MatcherUtils`, `BranchPruner`, `Bone`, `IBlockchainObject`, `VectorStylePropertiesDescriptor`, `tf.TensorBuffer`, `BinaryType`, `SessionData`, `DBDocument`, `DBType`, `Tween`, `DecorationOptions`, `ModuleLinks`, `CompilationResult`, `StorageContainer`, `WebGLResourceRepository`, `StoreModel`, `ComponentConstructor`, `ColorAxis`, `IType`, `IHud`, `CanvasFontFamilies`, `HTMLStyle`, `IArea`, `ExportNamedDeclaration`, `TitleProps`, `IterableExt`, `WellState`, `CreateListenerCommandInput`, `ContractContext`, `TestFormComponent`, `RedirectRequest`, `LucidModel`, `SSHExecCommandResponse`, `IFactory`, `AlertWrapperProps`, `TColumnRowPair`, `ResourceHandlerRequest`, `IUserInfo`, `React.SetStateAction`, `IMinemeldStatusService`, `ResourceTypeSummary`, `Parse.User`, `Itinerary`, `CreateBotVersionCommandInput`, `TGroupBy`, `StyleResourcesFileFormat`, `UpdateQueue`, `MissingError`, `OpenAPIV3.Document`, `ILanguageSyntax`, `AdapterUser`, `XhrRequest`, `DangerInlineResults`, `ValueScopeName`, `PromiseSettledResult`, `ChangeDetectorRef`, `AudioPlayer`, `CheckboxFilter`, `UhkBuffer`, `IExecutionContextContainer`, `CalcScaleAnmType`, `IMutableVector2`, `IInteraction`, `StateChangeEvent`, `DynamoDB.QueryInput`, `StatusMessage`, `CheckFunc`, `ToolbarItemsManager`, `StrapiModel`, `PackedBubbleLayout`, `EnumMetadata`, `JobsService`, `ActionMeta`, `StandardChip`, `sst.StackProps`, `ELULayerArgs`, `MotionValue`, `MousePressOptions`, `IImageBuilder`, `BMD`, `ISuggestValue`, `ParseTreeMatch`, `Supplier`, `VimValue`, `MatchingRoute`, `Pt2`, `ObjectRenderer`, `IRpoToken`, `YAMLNode`, `MapInfo`, `TokenDetailsWithCoingeckoId`, `types.AzExtLocation`, `AnimationEvent_2`, `RectDataLabel`, `VerificationMethod`, `BoundElementPropertyAst`, `CameraType`, `Pathfinder`, `StartCliConfig`, `RAFirebaseOptions`, `PackageResult`, `WritableStreamDefaultController`, `TransferValidatorStakeV1`, `SlpTokenGraph`, `HsdsEntity`, `TextShadowItem`, `BTI_Texture`, `LogRequest`, `PropSidebarItem`, `TileCoordinates`, `UpSetAddons`, `ChatThreadPropertiesUpdatedEvent`, `java.lang.Object`, `ScriptingLanguage`, `ColumnDefinitionNode`, `AppMetadata`, `DecipherCCM`, `JoinRow`, `SchemaConfig`, `TFLiteWebModelRunnerOptions`, `CommonInfo`, `ListChannelsRequest`, `MyComp`, `LexerInterpreter`, `VehicleCountRow`, `WastePerDay`, `AbstractElement`, `BIP44HDPath`, `GetAllAccountsValidationResult`, `EnhancedModuleGraph`, `MessageListener`, `PendingTransaction`, `Deploy`, `Agency`, `Ticker`, `LinkType`, `PackageChangelog`, `Calendar_Contracts.IEventQuery`, `AnchoredChange`, `ServerConfiguration`, `PostgresClient`, `CHAINS`, `FactReference`, `IShaderMaterialOptions`, `ViewCommon`, `OpenSearchInterval`, `logging.Level`, `OneNotePage`, `DnsResponse`, `ValueOptions`, `DeleteLeaderboardRecordRequest`, `DescribeAutoScalingGroupsCommandInput`, `LinksFunction`, `OpOrData`, `GlobalEventModel`, `SimpleTemplateRunner`, `IModLoaderAPI`, `CalendarManagerService`, `TileCacheId`, `StartFlowCommandInput`, `CompoundSelector`, `WebviewTag`, `MetadataKey`, `CategoricalColorScale`, `AccountingRecord`, `BitbucketPipelines`, `server.ManualServerConfig`, `Commitment`, `ConnectionErrorCode`, `ERROR_CODES`, `ScreenTestViewport`, `AlignmentDirectional`, `ExecutionContextContainer`, `Iteratee`, `DateParts`, `WorkflowOutputParameterModel`, `SlatePlugin`, `BitmapFont`, `TileIndex`, `IMetricListener`, `NotificationConfiguration`, `InjectContext`, `RuleScope`, `TwingNodeExpression`, `ResTable`, `IFilter`, `TimeLimitItem`, `LinkedWorkTree`, `ScopedLogger`, `INotification`, `ProjectTilemap`, `EditorController`, `DSOChangeAnalyzer`, `Hunk`, `InstanceBlockDeviceMapping`, `DeleteGroupRequest`, `backend_util.Conv2DInfo`, `InteractivityChecker`, `Models.BlobMetadata`, `SettingsType`, `ARAddImageOptions`, `IClientInteraction`, `Displayable`, `BookmarkItem`, `IntelRealtimeResponse`, `PickingRaycaster`, `FieldStatsCommonRequestParams`, `BaseEncryptedPacket`, `requests.ListDedicatedVmHostInstancesRequest`, `LinearGradientPoint`, `TransactionPayload`, `SIOPRequestCall`, `INEO`, `IServerModel`, `ServiceJSON`, `TfsCore.TeamContext`, `DeploymentParameters`, `MutationListener`, `BaseMultisigData`, `BodyProps`, `TaskWrapper`, `Survey.Survey`, `ObservableValue`, `BrowserPlatformUtilsService`, `TooManyRequestsException`, `ZodObject`, `messages.Pickle`, `ParsedSource`, `NodePolyfillsOptions`, `ProviderProps`, `AttributeValueType`, `WebGLComponent`, `TheRdsProxyStack`, `PercentLength`, `PgNotifyContext`, `ElementArrayFinder`, `TokenLevelState`, `IListRecipient`, `MDCContainerScheme`, `GfxBufferP_GL`, `ResolvedAxisOptions`, `EnrichmentPipeline`, `SavedObjectsExportTransformContext`, `MockBroadcastService`, `Required`, `VariantGeometry`, `ClassSchema`, `IWorkflowDb`, `ExportInfo`, `TextLiteralContext`, `NestedMap`, `Book`, `FetchHandlers`, `NetworkAddress`, `IsBindingBehavior`, `InterpolationType`, `V1Prometheus`, `TranslationLoaderService`, `NodeProperties`, `DOMWidgetView`, `GamepadEvent`, `FeatureNode`, `SafeBlock`, `PermissionsService`, `RNNCellForTest`, `AppMetaInfo`, `HitCircle`, `Table`, `TestComponentBuilder`, `SectionProps`, `MalformedPolicyDocumentException`, `SFCDiffWatcher`, `HtmlProps`, `L.List`, `IEmitterBehavior`, `_Props`, `CapabilitiesResponseWrapper`, `Decoded`, `GetComponentCommandInput`, `ast.Node`, `AllPackages`, `DomExplorerNode`, `ImportRelativity`, `FormDependency`, `IDs`, `IVector2Like`, `DropListRef`, `HTMLDivElement`, `SerializableMap`, `Container`, `ModuleDatafeed`, `Param`, `GanacheRawExtraTx`, `NefFile`, `IBaseRequest`, `NVM500Details`, `BreakOrContinueStatement`, `TransactionVersion`, `WorkerOptions`, `EntityMaterialParameters`, `DragBehavior`, `TRgb`, `ChangeStateMap`, `SFTPWrapper`, `DeleteFlowCommandInput`, `GlobalConfiguration`, `SPClientTemplates.RenderContext_FieldInForm`, `DatabaseContract`, `SourceDir`, `ControllerUIProp`, `ICandidateFeedback`, `AnalyzeResult`, `ExtraContext`, `ColorPresentation`, `PythonPreview`, `Paginator`, `StubBrowserStorage`, `AuthCredentials`, `estree.Node`, `Node_Struct`, `ModelArtifacts`, `RType`, `int`, `ImportSteamFriendsRequest`, `ArgSchemaOrRolesOrSummaryOrOpt`, `Yoga.YogaNode`, `IsAny`, `CommandName`, `TModule`, `CommonStyleProps`, `ZeroXPlaceTradeParams`, `CoinSelectInput`, `TestType`, `PSIChar`, `QPoint`, `StreamOptions`, `requests.ListPreauthenticatedRequestsRequest`, `Stock`, `SvelteDocument`, `App.storage.ICachedSettings`, `ApiProxy`, `PolicyStatement`, `SSM`, `SubsetStory`, `ObjectListResult`, `StaticBlog`, `Fixed`, `ObjectLiteral`, `ActionTicketParams`, `DeployedReplicaCollection`, `StylesMap`, `GetUrlFn`, `ContentProvider`, `NexusPlugin`, `CrossMentor`, `PrivateLinkConnectionApprovalRequestResource`, `RequestsDataItem`, `UnparsedSource`, `NetworkBuilder`, `GroupRegistryState`, `RouterCallback`, `OrderByClauseArgument`, `VueWrapper`, `SwapInfo`, `DIRECTION`, `ZipFile`, `RewriteResponseCase`, `Rect`, `requests.ListStreamsRequest`, `DaySpan`, `lsp.Hover`, `_IRelation`, `DownloadedBinary`, `GenericResource`, `TaskDefinition`, `MockLogger`, `PadplusContactPayload`, `DialogComponent`, `AstLocation`, `TimerService`, `OptionGroups`, `UninstallMessage`, `DirectoryResult`, `RequestFunction`, `CollateralRequirement`, `ImmutablePeriod`, `CryptoProvider`, `FreeBalanceState`, `ProjectInterface`, `EthereumCommon`, `SyncHook`, `Pagerow`, `DAL.DEVICE_ID_SYSTEM_LEVEL_DETECTOR`, `TypeScriptService`, `LocalStorage`, `TestView`, `ItemStyle`, `CommandClassOptions`, `Static`, `SteemConnectProvider`, `SecurityContext`, `TypographyVariant`, `VirtualEditor`, `WorkflowHooks`, `VertexAttributeDefinition`, `UploaderEnvs`, `LexDocument`, `NoticeItem`, `ReactWrapper`, `MIRArgument`, `todo`, `WriteFileCallback`, `WeekDayIndex`, `ReleasesClient`, `CheckItem`, `MemberInfo`, `DeleteInstanceCommandInput`, `HTMLIFrameElement`, `SvelteSnapshotManager`, `PointMesh`, `IPipelineOptions`, `Upload`, `OperationOptions`, `IErrorsBySection`, `ClusterExplorerResourceNode`, `SearchInWorkspaceResultLineNode`, `HeatmapData`, `MiniSimulation3D`, `DialogOptions`, `CancellationErrorCode`, `IncompleteFormatStringSegment`, `PrimitiveModeEnum`, `PacketType`, `UpdateChannelError`, `ProjectUploader`, `UnitsMap`, `ParagraphProps`, `Disembargo`, `BlockchainPackageExplorerProvider`, `WorkspaceRepo`, `IScreenInstance`, `SavedEncounter`, `TypePoint`, `EventManager`, `LegacyObjectToConfigAdapter`, `WorkflowType`, `TouchingElementInfo`, `NgbModal`, `ThemeCoreColors`, `RecordDef`, `MemoizedSelectorWithProps`, `IUserRequestOptions`, `TileTextElements`, `Subject`, `NextPageContext`, `CommonTerminalEnum`, `IsZeroBalanceFn`, `ReadOnlyReference`, `virtualFs.Host`, `BooruCredentials`, `DeleteIntentCommandInput`, `SegmentedBar`, `N4`, `BuildArtifact`, `PolygonGeometry`, `Images`, `ElementFound`, `ColumnIndexMap`, `PackageJsonFile`, `IAjaxSuccess`, `TimeOffService`, `KeyValueChangeRecord`, `ITerminalOptions`, `BeneficiaryApplication`, `RouteQuote`, `ethers.providers.FallbackProvider`, `GlobalSearchResult`, `FileRef`, `Nothing`, `Watchman`, `StreamDescriptions`, `IndexingStatusResolver`, `IAaveGovernanceV2`, `TitleCollection`, `ListPortfoliosForProductCommandInput`, `AppRouteRecordRaw`, `AclEntry`, `SharedContentImp`, `FooValueObject`, `Intl.DateTimeFormat`, `IChallengeProps`, `NetMDInterface`, `WizardContext`, `RefreshTokenDto`, `AUTWindow`, `ApiDoc`, `ListChangeSetsCommandInput`, `AttrEvaluationContext`, `PublishedFurniture`, `KsDiagnostic`, `BoxColliderShape`, `FilePath`, `Transactions`, `RecurringCharge`, `Docker`, `MutableVector4d`, `IMemoryTable`, `Algebra.GroupNode`, `ILoadBalancer`, `B3`, `ConfigurationLoader`, `SelectedIndexChangedEventData`, `ValueProvider`, `CertificateAuthorityRule`, `OrderJSON`, `RouteValidationResultFactory`, `Req`, `eventInterface`, `ArrayTypeNode`, `nsIFile`, `HtmlOptions`, `MapDispatch`, `ClassThatUseDifferentCreateMock`, `StructureValue`, `Bip32Path`, `ImportClause`, `TimeLog`, `CreateProfileDto`, `Tunnel`, `CodeMirror.Position`, `React.PointerEvent`, `Tangent`, `ModelHandle`, `FieldMap`, `RtlScrollAxisType`, `FactoryFunction`, `StateMachine.State`, `Unlisten`, `IActionMethodAttribute`, `SearchThemeAttributes`, `Base64`, `GraphQLSubscriptionConfig`, `ast.RunNode`, `Champion`, `Country`, `TorrentState`, `PouchDBFileSystem`, `InMemoryFileSystemHost`, `NgrxJsonApiStoreData`, `FlatpickrFn`, `TopicSubscription`, `GachaDetail`, `LineSegment3d`, `OriginationOp`, `HttpsProxyAgent`, `SelectionsBackup`, `A7`, `TwitchServiceConfig`, `jasmine.CustomMatcher`, `CopyResponse`, `HTTPResponse`, `ApiTypes.UploadLinkRequest`, `ContainerSiteConfig`, `QueryProviderAuditorRequest`, `GetExtensions`, `ProjectPost`, `NodeRange`, `PromiseConstructor`, `GetInviteCommand`, `ManifestLoader`, `GetSettingSuccessCallbackResult`, `GameRegistry`, `messages.SourceMediaType`, `OrgPass`, `MatchScreenshotOptions`, `ParsedCommand`, `ApplyAssetContext`, `UniqueSelectionDispatcherListener`, `ImageAlignment`, `CsmSlotEntity`, `FormRenderProps`, `EntitySubjectStore`, `PrivateKeyPEM`, `AbortController`, `FIRDocumentReference`, `ResourceComputationType`, `IKbnUrlStateStorage`, `ContractsState`, `SubTypeBuilder`, `ImageInfo`, `Return`, `MdcElementObserverAdapter`, `NotificationType`, `ShorthandPropertyAssignment`, `handlerFunc`, `AppAction`, `GlobalEventDealer`, `Idl`, `AnyCoinCode`, `DescribeEndpointCommandInput`, `TodoItemEntity`, `CTX`, `BufferCV`, `ASSymbolType`, `JQueryDeferred`, `ProgressListener`, `React.Ref`, `d.EventSpy`, `InsightLogicProps`, `DAL.DEVICE_ID_SYSTEM_DAC`, `Sheets`, `SwankRequest`, `SpeechCommandRecognizerResult`, `AnnotationDimensions`, `TypeFormatFlags`, `CreateServiceRequest`, `JitsiPeer`, `Yaz0DecompressorWASM`, `Sync`, `SignupRequest`, `PagerAdapter`, `ITokenParser`, `DescribeDBParameterGroupsCommandInput`, `ArticleModel`, `Info`, `React.KeyboardEvent`, `SignalRConfiguration`, `DitherKernel`, `NgElementConstructor`, `SimpleExpressionNode`, `IStartupInfo`, `IThrottler`, `ContractInfo`, `MicrosoftComputeExtensionsVirtualMachinesExtensionsProperties`, `SimulcastLayers`, `CalibrationLabware`, `RecordFormat`, `TradeHistoryAccount`, `firebase.firestore.Firestore`, `PutEmailIdentityFeedbackAttributesCommandInput`, `EveError`, `ArticleType`, `SessionToken`, `ModelConstructorInterface`, `TNodeType`, `Commune`, `TParams`, `Deno.Conn`, `ServerDataItem`, `Csp`, `ISceneDataArray`, `IRankingHeaderContext`, `vscode.WebviewPanel`, `SqrlParseErrorOptions`, `WebCryptoFunctionService`, `DefaultTreeElement`, `SourceNodesArgs`, `Extras`, `CreateNote`, `MockMessageRequester`, `TimeRaster`, `FormStore`, `RendererEvent`, `MsgDepositDeployment`, `PointerButton`, `ASStatement`, `PrimitiveArg`, `CancelWorkRequestResponse`, `SqlOutputContentProvider`, `ImportedRecord`, `ShortValidationErrors`, `ResponseHeaders`, `PaginationInfo`, `BufferContainer`, `HubPoller`, `CreateAppInstanceUserCommandInput`, `p5ex.p5exClass`, `CoreTypes.PercentLengthType`, `SamlRegisteredService`, `PopupState`, `WorkspaceHeader`, `PickingInfo`, `TypedFragment`, `Triggers`, `EntityActionOptions`, `DecodeOutput`, `FederationClient`, `DeviceSelection`, `fnVoid`, `WifiNetwork`, `FnModules`, `StridedSliceSparseSpec`, `PartnersState`, `CanvasTheme`, `CollisionEndEvent`, `ActorAnimKeeperInfo`, `ICassExploreModuleState`, `IncomingWebhook`, `GithubIssueItem`, `ITaskDataConnections`, `LastFmArtistInfo`, `HydrateScriptElement`, `NzUploadChangeParam`, `PaginationProps`, `FeedProviderType`, `StringContext`, `ElementCoreContext`, `CollectionTypes`, `FramesType`, `ModelLifecycleState`, `SuiteInfo`, `PDFState`, `ChromeExtensionManifest`, `AbortedCallback`, `TrackDetails`, `PropSchema`, `GitDSL`, `DocViewRenderProps`, `SignatureProviderResponseEnvelope`, `Multiply`, `ExtraCommandLineOptions`, `ValidationChain`, `CompositeAnimation`, `AST.Root`, `GADBannerView`, `ModifyLoadBalancerAttributesCommandInput`, `IServiceConstructor`, `JumpFloodOutput`, `MapAnchor`, `PartialRequired`, `UpdateChannelReadMarkerCommandInput`, `NgSelectComponent`, `ThyGuiderStep`, `CustomImage`, `SelectorInfo`, `MetaLogger`, `TagSet`, `ChartEvent`, `VariantCreateInput`, `byte`, `AudioRule`, `SerializedConcreteTaskInstance`, `WordStorage`, `PointerType`, `SignalID`, `EncryptionLevel`, `HttpException`, `GenericFormProps`, `Subscribe`, `SlideLayout`, `PageProps`, `TsxComponent`, `NotificationCreateProps`, `SeedReference`, `NewSyntax`, `QueryStateChange`, `TypedThunk`, `MockStoreCreator`, `JitMethodInfo`, `SwitchKeymapAction`, `CreateDBSubnetGroupCommandInput`, `HistoricalEntityData`, `MeetingCompositeStrings`, `UserActionBuilder`, `StylableResolver`, `IOrderCreationArgs`, `TabbedTable`, `That`, `ProtocolConnection`, `Matrix33`, `EmitOutput`, `Plugin.SNSAdaptor.Definition`, `BaseContract`, `ExperienceBucket`, `ComplexExpression`, `SetStatus`, `VdmProperty`, `ThrottledDelayer`, `TabularRows`, `ConfiguredPlugins`, `DndService`, `IOptionsObj`, `IUserItemOptions`, `EyeProps`, `AlterTableExecutor`, `tfl.LayersModel`, `SingleWritableState`, `SavedObjectsCreatePointInTimeFinderOptions`, `ArangoSearchView`, `IIdentity`, `RegistryItem`, `NgSourceFile`, `ScopedSlotReturnValue`, `And`, `Collapse`, `PickingCollisionVO`, `IGraphDef`, `SentryEvent`, `ethereum.Event`, `AgentPolicy`, `ContractManifestClient`, `ImageSpecs`, `ListWorkRequestLogsResponse`, `DatModelItem`, `ShaderId`, `HealthCheckResult`, `EdgePlaceholder`, `NLUOptions`, `social.InstancePath`, `AutoInstrumentationOptions`, `StudentFeedback`, `IExecutionContextProvider`, `BirdCount`, `PopupModelConfig`, `mixedInstance`, `XmlMapsXmlNameCommandInput`, `SavedObjectsUpdateResponse`, `CommandNode`, `NotificationInfo`, `ListAttachmentsCommandInput`, `JQuery.Event`, `NodeCryptoCreateCipher`, `FontInfo`, `QUICError`, `CopySource`, `ElmType`, `d.CollectionManifest`, `Dock`, `MbMap`, `RARCFile`, `SecurityPolicy`, `ListAliasesCommandInput`, `GetQueryResultsCommandInput`, `NodeInjectorFactory`, `ITouchEvent`, `BindingOptions`, `NotFoundErrorInfo`, `purgeCommandCriteria`, `LanguageOptions`, `StateManagerImpl`, `OrderStatusState`, `Term`, `PointStyleAccessor`, `ILiteral`, `EqState`, `MediaDiagnosticChangedEventArgs`, `VantagePointInfo`, `VolumeIndicatorCallback`, `InputMessage`, `ISelectionId`, `ImportNameInfo`, `GeoJSON`, `RootProps`, `UpSetSelection`, `JsonPointer`, `ClipId`, `GraphQLFormattedError`, `IOrganizationDocument`, `EncodedPart`, `GenericParameter`, `VariableDefinitions`, `CloudServiceResponse`, `StubHelper`, `ModelData`, `flatbuffers.Builder`, `CallArgs`, `HighlightInfo`, `MergeQuerySet`, `GetterTree`, `FiniteEnumerableOrArrayLike`, `PatternCache`, `ListFiltersCommandInput`, `Survey.Operand`, `ToolRunner`, `ApiResultCallback`, `UserApp`, `ODataPropertyResource`, `LinariaClassName`, `Emission`, `ContractAbi`, `SpriteWithDynamicBody`, `ZodEffects`, `XTermColorTheme`, `FluentLogger`, `IFileMeta`, `Types.TooltipCfg`, `TagService`, `MockAirlineService`, `CodePointCharStream`, `EditorConfiguration`, `DSlash`, `android.graphics.Bitmap`, `I18nFeature`, `Multisig`, `ShallowWrapper`, `CustomerService`, `AnnotatedError`, `IProjectCommand`, `KibanaPrivileges`, `GetShardIteratorCommandInput`, `AxisDimension`, `Float64Array`, `IRequestConfig`, `BodyPixConfig`, `Indicator`, `TupletNumber`, `CSSBlocksConfiguration`, `UpdateProjectCommand`, `UpdateRuleCommandInput`, `Writeable`, `ContactLightweight`, `Banner`, `IStop`, `PayableOverrides`, `SettingsConfig`, `ChangesetFileProps`, `NonemptyReadonlyArray`, `Ink`, `BinStructItem`, `IActorDef`, `IAction`, `CallHierarchyDeclaration`, `Checkpoints`, `PortfolioOverviewView`, `Observations`, `MockCallAgent`, `OutgoingResponse`, `ReadState`, `ISqlite.SqlType`, `AjaxAppenderConfiguration`, `Imports`, `RpcPeer`, `HeaderData`, `TupleNode`, `IsolatedAction`, `DevServerEditor`, `CinemaFrameType`, `AuctionView`, `Snippets`, `ArgumentType`, `Electron.Menu`, `IAuthContext`, `DAL.KEY_T`, `ICellMarker`, `EmailHandler`, `MockProject`, `ViewDefinitionProps`, `TiledObjectGroup`, `GetAttendeeCommandInput`, `PrismaClientFetcher`, `WatchDirectoryFlags`, `MatchCallback`, `Semiring`, `PromiseResolver`, `MatrixModel`, `GetDetailRowDataParams`, `ChannelSummary`, `DiagnosticMessage`, `AuxChannel`, `DescribeTasksCommandInput`, `Test`, `RuntimeBot`, `ChatBoxStateModel`, `CaseOrDefaultClause`, `OutputTargetWww`, `DecodedSignaturePart`, `PartyPresenceEvent`, `RequestMock`, `IChannel`, `TagResourceCommandInput`, `YEvent`, `CreateRuleCommandInput`, `InvalidConfig`, `AppearanceProviderFor`, `SpaceUser`, `XTableRow`, `ResolverInput`, `StacksKeys`, `LockedGoldInstance`, `MapperForType`, `Building`, `Simplify`, `StyleProps`, `InspectorEvents`, `SimpleAllocation`, `Flatten`, `MatchRule`, `ASTConverter`, `UploadTaskSnapshot`, `TestingGroup`, `SourceRange`, `SingleTablePrettyfier`, `TypeSignature`, `SaladTheme`, `TaskRun`, `EmbeddableEditorState`, `TransformFunction`, `RequestMethods`, `TextContextTypeConvert`, `FormatTypeFlags`, `IHillWarriorResult`, `FieldConfigData`, `NormalRange`, `UpdateChannelCommandInput`, `CdsNavigationStart`, `Lexer`, `SimulatorState`, `IConfigService`, `GameChannel`, `CommerceLayerConfig`, `PermutationSegment`, `IMeasurementEvent`, `SupCore.Data.Entries`, `PddlWorkspace`, `RenderDebugInfo`, `GroupedOrderPremiumRow`, `ChildProcessByStdio`, `ExpShapeSymbol`, `AccountEmail`, `matrix.MatrixArray`, `TempDirectory`, `HookHandlerDoneFunction`, `ControllerHandlerReturnType`, `ScalePower`, `IAppSettingsClient`, `requests.ListNetworkSecurityGroupSecurityRulesRequest`, `ModalComponentType`, `IUserFilterDefinition`, `InputHandler`, `KeyResultUpdateService`, `TableContext`, `core.IRawOperationMessage`, `PublicMilestone`, `Stretch`, `ColumnOrder`, `TestPhysicalObject`, `DashboardReport`, `Add`, `AppInputs`, `ClientConfiguration`, `$NextFunctionVer`, `ItemEntity`, `HTMLIonLabelElement`, `ServerIdentity`, `React.FocusEventHandler`, `TransferItem`, `ComboBox`, `StringDecoder`, `PNGWithMetadata`, `ResolvedReflectiveProvider`, `CTR`, `SnackbarKey`, `YarnPackageManager`, `GetExportCommandInput`, `egret.DisplayObject`, `CreateAliasRequest`, `BasicAuthResult`, `AnyKey`, `IPatchRecorder`, `CardScript`, `StartServices`, `ZonesManagerProps`, `Functor`, `SessionOnDisk`, `FrontendApplication`, `FlightDataModel`, `stream.Readable`, `Graphics.BlendOperation`, `MatchersUtil`, `Display`, `HardRedirectService`, `GraphQLRequestContextWillSendResponse`, `SExpressionRepl`, `ICreateData`, `PiEditProjectionItem`, `cdk.App`, `SpacesClientService`, `ManualClock`, `NormalizedPackageJson`, `EditableContent`, `InteractiveConfig`, `AoptB`, `RootThunkResult`, `SpotifyWebApiJs`, `firebase.firestore.DocumentReference`, `requests.GetJobRequest`, `GifFrame`, `Magic`, `ErrorAction`, `OhbugConfig`, `PNLeaf`, `GroupsPreviewType`, `TableProps`, `ProofService`, `SceneRenderer`, `GenericType`, `OutputDataConfig`, `Terrain`, `VisualDescriptor`, `UiSchema`, `TooltipValueFormatter`, `Changer`, `SymbolicTensor`, `SMap`, `PartyJoinRequest`, `RustPanic`, `XMLHttpRequestResponseType`, `IFetchedData`, `EthAddress`, `DebugBreakpointDecoration`, `Decibels`, `PutPermissionPolicyCommandInput`, `RequestPolicy`, `EngineArgs.SchemaPush`, `ScrollState`, `DMMF.Field`, `SearchSequence`, `IntersectionTypeNode`, `glob.Options`, `GetSharedData`, `UnitOfWork`, `ISnapshotTreeWithBlobContents`, `CallSettings`, `LeakDetectionSignal`, `WebSocketConnectCallbacks`, `TAG_SIZE`, `FirmwareUpgradeIpcResponse`, `RequestPopupModelAction`, `FaceNameSwizzler`, `IntPairMap`, `turfHelpers.FeatureCollection`, `TsAutocompleteComponent`, `IRenderTask`, `StatusBarAlignment`, `LoadConfigInit`, `IToolbarProps`, `WebGLQuery`, `OnTabSelectedlistener`, `Consensus`, `BalanceChecker`, `AssetsOptions`, `MoneyBase`, `Timeline`, `HandlerParamOptions`, `SolutionToSolutionDetails`, `QuestaoModel`, `CiBuildInfo`, `CohortState`, `Throttler`, `LiteralTypeBuildNode`, `PathHeadersMap`, `LoggerParameters`, `R.List`, `ExtrinsicDetails`, `SeriesZonesOptions`, `GQtyConfig`, `BaseClient`, `FileBuild`, `MockERC20TokenContract`, `UnwrappedArray`, `DMMFClass`, `NgxsRepositoryMeta`, `Structures`, `ScriptInfo`, `ArrayObserver`, `SearchkitClient`, `TGen`, `GeometricElement3dProps`, `ProjectedEdge`, `ButtonState`, `MockReaction`, `SelectionMode`, `Snowflake`, `DataSourceItemGroup`, `Node.Node`, `PuzzleGeometry`, `RLANAnimation`, `ComponentSet`, `WaitTask`, `IntSet`, `NginxDirective`, `UpdateSpellUsableEvent`, `React.DependencyList`, `FormulaDescriptor`, `IComputedFieldOwner`, `ListPublicKeysCommandInput`, `PortalCommunicator`, `OutputFormat`, `DropDownOption`, `InputArgs`, `MemberDefinition`, `EventEmitter2`, `ActionObject`, `MessageAttributeMap`, `UiGridColumnDirective`, `PageObjectConstructor`, `ISurveyStatus`, `ManifestInfo`, `FolderService`, `QuestionMapType`, `EditorMode`, `UtxoInfo`, `WechatTypes.SendMessageOptions`, `VariableStatement`, `anchor.Wallet`, `parse5.DefaultTreeDocument`, `Fork`, `CollapseGroupProps`, `StackInspector`, `DescribeRegionsCommandInput`, `StrikePrices`, `MaybePromise`, `InputWithModel`, `StorageAdapter`, `IMdcSliderElement`, `StartApplicationCommandInput`, `TestResultContainer`, `ILanguageObject`, `UpdateRequestBuilder`, `ImportNamespace.Interface`, `AggArgs`, `FS`, `BlockModel.Resolved`, `SmartContractPayload`, `theia.Task`, `CompletionEntryDetails`, `TargetLayoutNode`, `PatternMappingEntryNode`, `CanvasSpaceValues`, `ValidatePurchaseAppleRequest`, `MediaManager`, `ParameterDecorator`, `SymbolAndExponent`, `CommandRelay`, `ForgeModAnnotationData`, `SavedObjectEmbeddableInput`, `ITabInternal`, `UtilService`, `EntityActionDataServiceError`, `MapFn`, `Kysely`, `ISharingResponse`, `IVirtualDeviceValidatorResult`, `cookie.CookieSerializeOptions`, `LabelType`, `Z64SkeletonHeader`, `glTF.glTF`, `ODataModelField`, `IActionCodeSettings`, `TProductCategory`, `IAccessInfo`, `TimestampsToReturn`, `MatchPath`, `JPartition`, `FunctionToActionsMap`, `Triplet`, `LocationStrategy`, `INewProps`, `TableRefContext`, `RangeData`, `MuteRoomTrackResponse`, `CssToken`, `ListRowProps`, `UniqueID`, `tStartupOrShutdown`, `IDeviceInterface`, `ProtocolNotificationType0`, `EChartGraphNode`, `IFilterListItem`, `MigrationsContract`, `ICols`, `Articulations`, `OperatingSystem`, `ManagerConfig`, `BlockHeaderWithReceivedAt`, `NgForageConfig`, `Cluster`, `ClusterCollection`, `SimpleRect`, `DescribeContactCommandInput`, `ProjectsState`, `IRenderInfo`, `AddressSpace`, `ShellExecResult`, `RicardianContractProcessor`, `CommandRole`, `Viewer.ViewerRenderInput`, `MappingsEditorTestBed`, `LuaInfo`, `TrueFiPool2`, `SourceCodeLocation`, `ProviderStore`, `Shipment`, `ts.WhileStatement`, `ForwardDefinition`, `MenuModelRegistry`, `CryptoKey`, `HandlerType`, `NumberContext`, `SlippageTolerance`, `IMediatorConfigurator`, `IDBTransaction`, `SenderFunction`, `StringInput`, `IFontFaceOptions`, `GridBase`, `OutputTargetDistTypes`, `PathSegment`, `PaginationService`, `Persistor`, `GenericCompressorProperty`, `IAckedSummary`, `BazelBuildEvent`, `FieldGroup`, `IState`, `DeletePipelineCommandInput`, `PlayerAggHistoryEntry`, `MCU`, `ITransport`, `ConsolidatedCertificateRequest`, `OrganizationState`, `TranscriptEvent`, `ActionHistory`, `MockResolvers`, `CandidatePair`, `GroundPlane`, `AssetInfo`, `DateTimeFormatPart`, `UIDialog`, `PublicAppDeepLinkInfo`, `DidChangeConfigurationParams`, `ContextMenuExampleProps`, `THandler`, `ResolvedDeclarationList`, `SyntheticPointerEvent`, `Grid3D`, `ScreenSize`, `GetObjectCommandInput`, `CosmosdbSqlDatabase`, `OperationCallbackArg`, `Atom.TextEditor`, `MessageEntity`, `TEAttr`, `ObserverActionType`, `ThyDragDropEvent`, `PathMatch`, `StopChannelCommandInput`, `CardManager`, `SavedObjectsResolveResponse`, `ParameterDetails`, `LegendItemExtraValues`, `RuntimeFn`, `HarperDBRecord`, `TerritoryAliasMap`, `LogsConfig`, `Resetter`, `IGroupTemplate`, `PlacementOptions`, `SectionMarkerConfig`, `ViewPropertyConfig`, `ConflictState`, `TEBinOp`, `CreateBotCommandInput`, `UiSettingsParams`, `WebhookRequest`, `MDCBottomNavigationBar`, `BroadcastOptions`, `IGroup`, `OpenSearchRawResponse`, `TGroupHandle`, `InputNode`, `PossiblyAsyncIterable`, `IDeploymentStrategy`, `PanResponderInstance`, `HandlerInboundMessage`, `Widget.ResizeMessage`, `WorkflowNode`, `CachedBuildRequestOptions`, `GetApplicationCommandInput`, `DFChatArchiveEntry`, `LeafletElement`, `extendedPingOptions`, `GenericNotificationHandler`, `CompilerBuildResults`, `MagicString`, `KeyframeNodeList`, `SubscriptionEntry`, `VectorLayer`, `GetUserResponse`, `SymbolType`, `near.NearSwapTransaction`, `ControllerMetadata`, `GoogleUser`, `SObjectTransformer`, `DtlsPlaintext`, `UILabel`, `ComponentStrings`, `BroadcastTx`, `RemoteCallParticipants`, `GitBranch`, `RequestSession`, `IFunctionIdentifier`, `ComplexError`, `ChatAdapter`, `ServerAccessKey`, `SessionIdentifier`, `Food`, `messages.FeatureChild`, `Shape2DSW`, `Metadata_Add_Options`, `IEntityKeyType`, `IFabricWalletGenerator`, `SSHConfig`, `ListField.Value`, `AskQuestionsParams`, `esbuild.BuildOptions`, `MaterialInstance`, `ODataActionResource`, `Activity`, `PaymentIntent`, `GraphPartitioning`, `FullIndexInfo`, `Accountability`, `JobRun`, `OutputTargetDistCustomElementsBundle`, `GenericIdModel`, `Combined`, `OrbitControls`, `Body2DSW`, `StyleRule`, `VNodeStyle`, `resourceI`, `ObjectTracker`, `SMTFunctionUninterpreted`, `TemplateLiteral`, `ASVariable`, `NodeSelector`, `PackageTypeReport`, `NotificationConfig`, `AuthContextData`, `InputTypes`, `AST.SubExpression`, `PasswordHistoryResponse`, `RetryData`, `MIREphemeralListType`, `AdEventListener`, `VisTypeListEntry`, `ConnectionHealthData`, `SavedObjectType`, `Call`, `SendMessageRequest`, `StoredCiphertext`, `PDFAcroListBox`, `HTMLImageElement`, `TableType`, `BaseStateContainer`, `TerminalNode`, `ResourceNotFoundFault`, `requests.ListSteeringPolicyAttachmentsRequest`, `CSSStyleRule`, `ReadWriteStream`, `EnqueuedTask`, `AngularPackage`, `Sampler2DTerm`, `Registrar`, `QualifiedIdentifierContext`, `FilterFunction`, `ColumnHeaderOptions`, `GherkinDocumentWalker`, `TypeAttributes`, `GUIDriverMaker`, `IAppStore`, `GroupingService`, `SpringChain`, `XYChart`, `Node_Annotation`, `AppStateStatus`, `PageQueryOptions`, `Web3`, `IAdjacencyCost`, `ExecutionState`, `observable.EventData`, `TransportSession`, `ListFriendsRequest`, `Curve`, `Asset`, `UseStore`, `CalculationId`, `FilterValues`, `RelationsOpts`, `AcronymStyleOptions`, `chrome.runtime.Port`, `Attach`, `MDCAlertControllerImpl`, `CommonContext`, `Reconciliation`, `ProcessingContext`, `IOptionalIType`, `Average`, `ProviderWithScope`, `AWSAccount`, `ForwardingSchema`, `PropertyValue`, `Fig.ExecuteShellCommandFunction`, `AppxEngineStep`, `AsyncTask`, `VertexList`, `Tuplet`, `StyleResource`, `ObjectAssertion`, `mod.TargetGroup`, `Entity`, `InsightsResult`, `MockRouteDefinition`, `SymExp`, `WsChartService`, `Lesson`, `OnCameraFrameCallbackResult`, `PlannedOrganizationalUnit`, `AnnotationState`, `AccountModel`, `_IDb`, `BuildInPluginState`, `SVGAttributes`, `UnsubscribeSnapshot`, `ReadyPromise`, `Fuse`, `CommandlineOption`, `UserManager`, `TransformerFactory`, `AstNode`, `Fontkit`, `HistoryEnv`, `GzipPluginOptions`, `Euler`, `PeopleIterator`, `YieldEveryOptions`, `BuildOnEventRemove`, `UserDevices`, `IStore`, `WorkspaceStructure`, `PlotRowIndex`, `IControlData`, `AppJob`, `ReuseContextCloseEvent`, `TeamProps`, `glTF1`, `ChainJson`, `TutorialService`, `CreateDistributionCommandInput`, `TabView`, `SpriteVID`, `AsyncSchema`, `MediaProps`, `RenameModuleProvider`, `AutoUVBox`, `vscode.CustomDocument`, `ITuple2`, `PickRequired`, `MicrosoftStorageStorageAccountsResources`, `ObjectMultiplex`, `NgextConfigResolved`, `ICfnBinding`, `RedPepperService`, `SimulationState`, `GeneratorError`, `AnyGuildChannel`, `KPuzzle`, `AdjacentZones`, `LanguageServiceContainer`, `ClassDetails`, `CodeNameDTO`, `IFormProps`, `DatabaseType`, `bbox`, `Highcharts.StockToolsNavigationBindings`, `PartialErrorContinuation`, `ModbusForm`, `ApiDecoration`, `AgentOptions`, `BatchExecuteStatementCommandInput`, `InitialArgv`, `ReactFramework`, `MockUdpTally`, `StripeModuleConfig`, `IExpenseCategory`, `SiteMetadata`, `WorkingDirectoryInfo`, `DescribeDatasetResponse`, `PriceScale`, `XMLDocument`, `WriteLock`, `CdsNavigationItem`, `DocumentTree`, `RpcKernelBaseConnection`, `Subnet`, `ImportEqualsDeclaration`, `StorableUser`, `TypeDescription`, `EntityConfiguration`, `ProductOperations`, `NodeGraphicsItem`, `IconProps`, `CalculatedBlock`, `CircleDatumAlternative`, `FilterValueFormatter`, `QuirrelClient`, `Buff`, `DeleteParams`, `React.ForwardRefExoticComponent`, `ReadModelMetadata`, `ITemplatizedCard`, `UniversalCookies.Options`, `ResolveSubscriptionFn`, `JSDocTagInfo`, `ITransitionData`, `ContractRegister`, `AllKindNode`, `UsePaginationModelConfig`, `NodeStack`, `TabsModel`, `CouncilData`, `AxisOrder`, `IFindQuery`, `builder.Session`, `CustomStore`, `WorkspaceExtImpl`, `IActionSet`, `KeySuffixOptions`, `SongState`, `KamiConfig`, `ViewModel_`, `STExportOptions`, `BitstreamFormatDataService`, `Box3Like`, `IOrganizationProject`, `BlockchainClient`, `TransformedData`, `CLM.TextVariation`, `CountState`, `ChartJSNodeCanvas`, `GeometricElement`, `PPTDataType`, `IPayloadAction`, `PagedResp`, `JsonEnumsCommandInput`, `PrismTheme`, `FeederDetails`, `EntityDictionary`, `TemplateNode`, `LLVMNamePointer`, `DocumentSelector`, `Voting`, `Cone`, `EngineEventType`, `XmlSerializerOptions`, `StreamType`, `UpdateUserRequest`, `ChannelItem`, `ProjectStore`, `WebpageMetadata`, `IndexDiff`, `anyNotSymbol`, `ListParticipantsRequest`, `MkFuncHookState`, `CoronaData`, `MessageDataFilled`, `PathLike`, `CreditedImage`, `vscode.CompletionContext`, `CustomDecorator`, `IChangesState`, `EmbeddableStateWithType`, `TokenTypes`, `TextVerticalAlign`, `PushRPC`, `FSWatcher`, `OpenChannelMessage`, `VirtualScope`, `AsyncIterableObservable`, `RootReducerState`, `AuxUser`, `NavigationTreeViewModel`, `GetFieldFormat`, `HubLinksWebPart`, `SceneMouseEvent`, `CheckMode`, `DMMF.ArgType`, `MatrixMessageProcessor`, `FunctionN`, `apid.GetReserveListsOption`, `Precision`, `FileWatcherCallback`, `ClassOrFunctionOrVariableDeclaration`, `KeyShare`, `CGRect`, `SpeechConnectionMessage`, `PrimitiveStringTypeKind`, `InternalPropertyObserver`, `JDesign`, `MarkerElement`, `MessageBundle`, `Road`, `CompletionItem`, `VueApolloRawPluginConfig`, `Snapshots`, `requests.ListResourceTypesRequest`, `angular.auto.IInjectorService`, `SymbolMetadata`, `CommandInput`, `TootDetailState`, `requests.ListSecurityAssessmentsRequest`, `UntagResourceCommandOutput`, `ExperimentStateType`, `DataKind`, `QueryParamsType`, `CreateMigrationDetails`, `AggConfigs`, `ScmResourceGroup`, `SExpr`, `ListServicePipelineProvisionedResourcesCommandInput`, `IResolveDeclarationReferenceResult`, `AllureConfig`, `Path1`, `StructuredStorageBaseHelperOptions`, `Filesystem.FileExistsAsync`, `ValidEndpointType`, `Resolved`, `UnitAnalyser`, `Compression`, `LoginRequest`, `MonikerData`, `QuakemlService`, `InputValueDefinitionNode`, `JSType`, `IndTexMtx`, `SymbolDefinition`, `CompletionMsg`, `SCanvas`, `AsBodiless`, `PrimitiveProps`, `ValidatorFunctionType`, `U8Archive`, `RouteEntry`, `InternalData`, `LifecycleChannel`, `SparseArray`, `LatexAst`, `ComponentSetup`, `MatchExpression`, `ListTagsResponse`, `RequestUploadService`, `DescribeDatasetGroupCommandInput`, `LogoProps`, `OfficeFunction`, `DatasourceRefType`, `AlternativesSchema`, `TeleportContext`, `IteratorContainer`, `OSD_FIELD_TYPES`, `IPageNode`, `ast.NodeTag`, `JsonAstNode`, `STPCardValidationState`, `AccountGameCenter`, `IMidwayApplication`, `DirectoryReader`, `NotificationIOS`, `N2`, `ReadModelEnvelope`, `SettingsProperty`, `LifeCycle`, `IReducerContext`, `TeamModel`, `IFilterArgs`, `GetChildNodes`, `EffectFallbacks`, `SanityClient`, `MusicbrainzArtist`, `FileExtensionMap`, `ParticleSystem`, `vd.VNode`, `THREE.Line3`, `LanguageMatcher`, `HTMLIonBackdropElement`, `BarTuple`, `NzI18nService`, `ts.ConciseBody`, `VertexAttributeInput`, `GaugeDialogType`, `ProblemFileEntity`, `cpptools.Client`, `SearchSessionsConfig`, `ConfigurationService`, `IMatches`, `RotationManager`, `PackageUrlResolver`, `Pkg`, `MOCK_TYPE`, `FrameNode`, `GraphQLFieldConfigMap`, `MediaRec`, `TestSolo`, `TopNavItem`, `TabHandler`, `RequestAPI`, `IRowAPI`, `OptionalKind`, `ICustomizations`, `CppArgument`, `TSError`, `InferredSize`, `UnpackOptions`, `FilterGroup`, `Preprocessor`, `WeaponMaterial`, `Embed`, `IteratorWithOperators`, `GetMemberCommandInput`, `IPaginationOptions`, `FolderItem`, `IFormItemTemplate`, `WorkspaceData`, `ArrayPattern`, `WorkerPool`, `ElasticPool`, `MarkInterface`, `K4`, `AtomicMarketNamespace`, `FetchedIndexPattern`, `ClassType`, `CourseComponent`, `Square`, `QueryArray`, `ResourceKey`, `CircuitGroupCircuit`, `Acc`, `BBOX`, `EthereumSignatory`, `CommandClass`, `IInsert`, `LGraph`, `_IObjectMap`, `JsonOutput`, `GeoPath`, `RoomUserEntry`, `Themes.Theme`, `TinaFieldInner`, `SavedObjectsPublicPlugin`, `OneofDescriptorProto`, `SharedFunctionCollection`, `BackstageManager`, `CSSToken`, `ConfigActionTypes`, `CallAndResponse`, `puppeteer.ElementHandle`, `IAmExportedWithEqual`, `OnPreResponseToolkit`, `INamesMap`, `ModelSpec`, `FabricSmartContractDefinition`, `IArray`, `UndoPuginStore`, `Jsonified`, `SceneContext`, `Eq`, `IApiServer`, `RC`, `AccountsContract`, `BidirectionalMergeMode`, `DayGridViewWrapper`, `ContractJSON`, `SentimentValue`, `RectResponderModel`, `RequestInterceptor`, `AnimationBase`, `ReaderOptions`, `AnimationDirection`, `MDCMultilineTextField`, `TEUnaryOp`, `SavedObjectsExportTransform`, `AnnotationLayer`, `NuxtAxiosInstance`, `ShapeView`, `DeleteVpcLinkCommandInput`, `DensityBuilder`, `DynamicEntityService`, `WritableDraft`, `MarkupKind`, `LegendSpec`, `ModuleListener`, `Fix`, `OpenEdgeConfig`, `MessageReader`, `CommandRunner`, `MaxAge`, `VtxLoaderDesc`, `BaseService`, `PipeFlags`, `VBox`, `CheckboxProps`, `EventNameFnMap`, `CSharpClass`, `CrawlerDomain`, `ResourceLoader`, `IHookStateInitAction`, `CBPeripheralWithDelegate`, `PageDoc`, `UnitSystemKey`, `WebviewWidget`, `TranspileResult`, `ExpressionsServiceSetup`, `ParsedBlock`, `AliasHierarchyVisitor`, `NodeDefaultCryptographicMaterialsManager`, `ArDrive`, `SanityTestNode`, `Chai.Should`, `FIRStorageReference`, `ControlButtonProps`, `SerializedTemplateInfo`, `CommandClasses`, `RuleIteratorWithScope`, `Done`, `LazyLight`, `ValueStream`, `TransportEvent`, `PermissionService`, `ConfigurableForm`, `Unpacker`, `Fork.Fork`, `LightInstance_t`, `IServerError`, `messages.PickleTag`, `OrganizationPolicyType`, `Artist`, `DeleteAppInstanceCommandInput`, `HistoryStore`, `DaffCategoryFilterRequest`, `d.ModeStyles`, `LoadingService`, `GroupOrientation`, `V1Pod`, `FnU3`, `AppInstanceEventType`, `WorkspacePath`, `ReadBuffer`, `NzDebugConfig`, `RedisTestEntity`, `BreadcrumbOptions`, `DefinitionRowForInsert`, `TreeDiagramNode`, `_TimerCondition`, `DataGraph`, `UnicodeBlock`, `Series.PlotBoxObject`, `UVTX`, `ITextModel`, `RawBlock`, `ex.Input.KeyEvent`, `CollectMultiNamespaceReferencesParams`, `jsiiReflect.Type`, `MessageHeaders`, `ThunkActionT`, `d.TransformOptions`, `UAMethod`, `DispatchOptions`, `TagWithRelations`, `rpcConfig`, `CallbackList`, `BaseCommand`, `Node_Enum`, `ParamSpecEntry`, `VariableValueTypes`, `DeleteBucketTaggingCommandInput`, `MultiSet`, `SignedContractCallOptions`, `ResolverBuilder`, `MessageEmbed`, `Events.exittrigger`, `GraphicsItem`, `VanessaTabItem`, `DAL.DEVICE_OK`, `FeatureFilter`, `NextRequest`, `Ed25519KeyPair`, `DescribeIndexCommandInput`, `Bond`, `Attr`, `FactRecord`, `FormatterParam`, `alt.Entity`, `immutable.Map`, `ElmExpr`, `DateRange`, `Publication`, `IAssetItem`, `GetListParams`, `TemplateStore`, `InvertedIndex`, `poolpair.PoolSwapMetadata`, `AngularFirestoreCollection`, `Mouse`, `EmailService`, `CoinTransferMap`, `IRenderMimeRegistry`, `Until`, `PrismaClient`, `FlagFixPoints`, `MockAttr`, `TDeleteManyInput`, `CallHook`, `UICarouselItemComponent`, `AttributeValueChoiceOption`, `QueryCommand`, `SimpleTextSymbol`, `ServerSyncBufferState`, `IAuthFormContext`, `DocumentTypes`, `AnyChildren`, `NamedExoticComponent`, `HSD_TEInput`, `ElementResult`, `CreateDatabaseCommandInput`, `GoToLabelProps`, `DiscordToken`, `BinaryInfo`, `NodeProps`, `RefreshTokenEntity`, `JobLifecycleState`, `PostfixUnaryOperator`, `WebAppStack`, `IEqualityComparer`, `ISummaryRenderer`, `MatAutocompleteSelectedEvent`, `SearchOptions`, `QueueEntry`, `Literal`, `ContentDescriptorRequestOptions`, `Evaluate`, `AnimationGroup`, `PhrasingContent`, `FacetsGroup`, `PopupAlignment`, `GltfPreviewPanelInfo`, `KeyValuePair`, `Electron.BrowserWindowConstructorOptions`, `InstantComponentTransformation`, `TransportOptions`, `ColorBlindnessMode`, `LevelUp`, `J3DModelData`, `TodoType`, `NewLineFile`, `INumberDictionary`, `StoreManager`, `MemoExoticComponent`, `ICommandBus`, `LiveEventMessage`, `PathAndContent`, `MessageBuilder`, `BlockNode`, `OpenApiDocumentRefs`, `PDFParser`, `ISubImage`, `UseFormReset`, `Database.Replica`, `BarcodeMetadata`, `AuthState`, `HydrateComponent`, `ILibraryResultState`, `DimensionGroup`, `ApplicationWindow`, `StatedBeanMetaStorage`, `OnReferenceInvalidated`, `RuntimeError`, `AuthenticateGameCenterRequest`, `GetDirsOrFilesOptions`, `PayloadInput`, `DevicesButtonStrings`, `LocalPackageInfo`, `Events.pointerwheel`, `RolesEnum`, `UnitCheckboxComponent`, `DragInfo`, `OpenApiRequestBuilder`, `TocStepItem`, `IModalState`, `StackRootNavContext`, `HsLanguageService`, `StyleResourcesLoaderNormalizedOptions`, `ReflectionCapabilities`, `ConnectionProperty`, `SessionID`, `TAbstractFile`, `JSONType`, `AnyQuery`, `CacheVm`, `Mocha.Context`, `SecureNoteData`, `INestMicroservice`, `EditMode`, `JhiAlertService`, `SlotDefaultValue`, `UntagResourceResponse`, `InputHTMLAttributes`, `CanvasKit`, `BRepGeometryFunction`, `ir.Type`, `RequestSchema`, `IpcMainEvent`, `Annotation`, `android.content.DialogInterface`, `JsonArray`, `ITokenMatcher`, `NexeCompiler`, `CategorizedMethodMemberDoc`, `SoftwareKeyProvider`, `QualityLevel`, `HttpRequestOptions`, `Models.ModifiedAccessConditions`, `EmissionMaterial`, `CountModel`, `ReactFlowState`, `ManualConflictResolution`, `SiteSourceControl`, `FilterComponentSettings`, `Collectable`, `Html5QrcodeScannerState`, `ReminderFormatType`, `ValueFormatter`, `TapGesture`, `JavaScriptEmbedder`, `CodeBuildAction`, `ParticlesFlyerView`, `u`, `PddlConfiguration`, `ModuleOptionsWithValidateFalse`, `LibraryNotificationActionContext`, `IRestClientResponse`, `_ChildType`, `OrganizationPolicy`, `OutputTargetDistCollection`, `AsBodilessImage`, `EthereumERC721ContextInterface`, `MiddlewareResultFactory`, `Firmware`, `JoinTournamentRequest`, `MiddlewareFn`, `DynamicCommandLineParser`, `EventDestination`, `CompilerIR`, `PlainObjectOf`, `CourseService`, `UIComponent`, `IDataSlice`, `ExtensionService`, `EffectiveTypeResult`, `express.Request`, `NAVTableField`, `APIVersion`, `Scanner`, `SignedByDBInterface`, `keyType`, `NetworkData`, `DayCellStyle`, `DocumentSymbol`, `DataAssetSummary`, `ColorResolvable`, `IMergeTreeDeltaCallbackArgs`, `CodeKeywordDefinition`, `DescribeConfigurationRevisionCommandInput`, `DynamicCommandLineAction`, `MatcherState`, `AndroidInput`, `LiteralSource`, `CompBitsValue`, `MemberExpression`, `WeakEvent`, `RequestOption`, `DialogButton`, `ObjectUpdatesEntry`, `PasswordService`, `SimpleCharacter`, `CalendarEvent`, `LayerState`, `AccountingTemplate`, `MyState`, `PatchResult`, `DAL.DEVICE_ID_THERMOMETER`, `ts.Signature`, `CallMethodResult`, `LiteralTypeNode`, `StatePropertyAccessor`, `CompressionTextureTypeEnum`, `ThySelectionListChange`, `ListTableRowsCommandInput`, `ts.ConstructorDeclaration`, `HeroesState`, `TopAppBar`, `ListSendersRequest`, `TypeDBTransaction`, `SFUManager`, `EndpointConfiguration`, `SemanticRole`, `SettingsProps`, `VarScope`, `Const`, `ApprovalRuleTemplate`, `ListResolversRequest`, `Database.User`, `TSObj`, `CachedTileLayer`, `IRandomAccessIterator`, `CsvGenerator`, `PreloadedState`, `ICountry`, `EndpointArgument`, `HashMapIteratorLocationTracker`, `CreateCollectionOptions`, `IndicatorNode`, `TwingTest`, `PromisedComputed`, `CSSParsedDeclaration`, `RelativeBandsPadding`, `DomainEventMapping`, `IFBXLoaderRuntime`, `ConfigStore`, `SGItem`, `IApprovalPolicy`, `GetDeliverabilityDashboardOptionsCommandInput`, `CliOutputOptions`, `MotionComponent`, `GetAppCommandInput`, `SubtitlesCardBases`, `UpdateSource`, `BinSet`, `BuildArtifacts`, `LoggerOptions`, `CanvasLayerModel`, `hardforkOptions`, `GetActionParams`, `WExpression`, `SubscriptionField`, `DanmakuDrawer`, `EnvProducer`, `ListManagementAgentPluginsRequest`, `RTCTrackEvent`, `OrganizationVendorService`, `ListRulesCommandInput`, `RetryDataReplicationCommandInput`, `UnknownError`, `SavedReport`, `UpdateOneInputType`, `DockerAuthObj`, `TSIf`, `DropDownProps`, `WithdrawByoipCidrCommandInput`, `ApplicationCustomizerContext`, `IsCommon`, `GenerateSWOptions`, `PCLike`, `FileStatus`, `IFileInfo`, `MonitoredItem`, `LSTMCellLayerArgs`, `FormCookbookSample`, `BlockStateRegistry`, `DukDvalueMsg`, `UINavigationItem`, `ArrayBinding`, `InformedOpenLink`, `LabelTable`, `SharePluginSetup`, `RoverWorkload`, `PeopleEmitter`, `ObjectConstructor`, `ReuseTabNotify`, `Repositoryish`, `CallbackContext`, `OverflowModel`, `PollingInterval`, `FeedbackData`, `CreateAccountStatus`, `DocsLibrary`, `DockerImageName`, `E`, `DescribeDomainAutoTunesCommandInput`, `ModuleScope`, `OptionsMap`, `requests.ListAlertRulesRequest`, `WalletModule`, `CompositeCollectionJavaIterator`, `IProtoNode`, `requests.ListApplianceExportJobsRequest`, `GatewayConfig`, `UseBoundStore`, `PointAndCrossingsList`, `WindowFrameName`, `JSONSchemaRef`, `AVRPortConfig`, `PlayMacroAction`, `AssertNode`, `StatusBarWidgetControlArgs`, `ShaderNode`, `IMGUI`, `EditableProps`, `TypeContent`, `vsc.Uri`, `IndexResults`, `SubjectKeyframes`, `TreeNodeIndex`, `StandardProjectCard`, `SubscriptionNotFoundFault`, `Analytics`, `Json.StringValue`, `StepsProps`, `KeyPairTronPaymentsConfig`, `ComboEventPayload`, `IParseAttribute`, `EntityReference`, `IFileRange`, `ReadTarball`, `ExtendedAreaInfo`, `ResourceTimelineGridWrapper`, `GeneralSettings`, `CLIElement`, `TransactionBuilder`, `EditablePolygon`, `ACLService`, `redis.ClientOpts`, `BlockParameter`, `ChatLogged`, `LanguageEffects`, `QueryBuilder`, `AudioBufferSourceNode`, `BitfinexWebsocket`, `APIs`, `CommandLineArgs`, `Mapper`, `OBS`, `SimpleUnaryImpl`, `ProxyDao`, `DataViewHierarchyNode`, `request.Options`, `PullBlock`, `VersionComponent`, `ViewableGrid`, `CausalRepoClient`, `d3Request.Request`, `fromSettingsActions.UpdateSettingModel`, `ServiceException`, `IConnections`, `RowNode`, `ServerErrorResponse`, `NodeTree`, `NamespaceObject`, `ts.ExpressionWithTypeArguments`, `PlaylistModel`, `HttpResponseRedirect`, `TokenFetcher`, `PhraseFilter`, `ElasticsearchResponse`, `DAL.KEY_5`, `ConnectionBackend`, `ILocalConfig`, `ModuleBody`, `ServerCancellationToken`, `ResponderModeTypes`, `StackStatus`, `NodeProtocolInfo`, `UsageSummary`, `project.Project`, `ContainerForTest`, `ConditionInfo`, `io.SaveConfig`, `MDCSliderAdapter`, `LCH`, `LineItem`, `ParquetCompression`, `DistinctQuestion`, `program.Command`, `HarmonyAddress`, `IdeaId`, `CustomFeatureConfig`, `ClipboardEvent`, `VgApiService`, `ScannedMethod`, `Fixed8`, `NgControl`, `CreateCard`, `SharedDelta`, `IKeyboard`, `DidDocument`, `IdentifierToken`, `StringPart`, `PanGesture`, `ISizeCalculationResult`, `NumberSchema`, `ResourceList`, `Address`, `ScopedCookieSessionStorage`, `CreateProcedureWithInput`, `CategoryType`, `ReadableStreamController`, `OperatorOption`, `DeleteConfigurationSetEventDestinationCommandInput`, `BaseSkillBuilder`, `mm.IFormat`, `SizeLimitChecker`, `bsiChecker.Checker`, `OffersState`, `Ecs`, `k8s.Provider`, `Cards`, `ChangeUserLanguageDto`, `IPaneRenderer`, `ScannedBehavior`, `IStructuredSearch`, `PackageDetail`, `ListPhoneNumbersCommandInput`, `ViewerRenderInput`, `TimelineDateProfile`, `ArgGetter`, `WebGLRenderbuffer`, `SignedToken`, `MeetingSessionVideoAvailability`, `Clue`, `SharedConfig`, `MergeEl`, `ListTagsForResourceMessage`, `requests.ListVaultsRequest`, `VisContainerProps`, `ErrorDetail`, `PackageManager`, `TreeGridAxis`, `CallHierarchyIncomingCall`, `redis.RedisClient`, `ConsoleService`, `NavigationViewModel`, `ImportMode`, `XNodeData`, `AssetServiceClient`, `ClientPlugin`, `ControlInterface`, `MerchantMenuOrderGoodsEntity`, `AuthorizationCode`, `ServiceHelper`, `BreadcrumbContextOptions`, `Proc`, `FilterBuilder`, `ts.IntersectionTypeNode`, `Types.CodeGenerator.CustomGenerator`, `DaffStatefulCartItem`, `MiddlewareContext`, `AssetResolver`, `CommonSelectedNode`, `StreamID`, `Kernel`, `LmdbDbi`, `unitOfTime.Base`, `MainWindow`, `FunctionShape`, `HmrStyleUpdate`, `ShellString`, `GetIntegrationResponseCommandInput`, `SendMessageOptions`, `ButtonToolConfig`, `QueryPlan`, `ArrayType`, `PendingSuiteFunction`, `TiledObject`, `ApplicationEventData`, `_rest`, `TSBuffer`, `MakeErrorMessage`, `TabStateReturn`, `ArrayExpression`, `LmdbEnv`, `PropertyInfo`, `ProgressData`, `DocController`, `FunctionRunner`, `IChoice`, `SegmentRange`, `YallOptions`, `QConn`, `Sha512`, `GameSagaContextPlayers`, `AdapterConstructor`, `KeySet`, `Farmbot`, `$N.NeighborEntry`, `TestObservableLike`, `PackageDetails`, `OidcSession`, `IHistoryItem`, `BridgeMessage`, `SignatureHelpItem`, `IPC.IShellProcess`, `IOperand`, `TooManyTagsException`, `Joiner`, `ChromeConnection`, `f64`, `WarriorLoader`, `MediaObserver`, `SvgToFontOptions`, `BasicProfile`, `... 23 more ...`, `CheatModeMachineContext`, `ElectricRailMovingPoint`, `MutableVector4`, `SignalListener`, `ChangeFn`, `BookmarkChange`, `Plural`, `Referenceables`, `IFaction`, `RetryConfigurationDetails`, `SubsystemType`, `Attachment`, `tfc.Tensor`, `PartialGestureState`, `CKB`, `RpcClient`, `ObsidianLiveSyncSettings`, `Http3RequestMetadata`, `DiscordEmbed`, `Detail`, `ProposalActions`, `OrmConnectionHook`, `TestScriptError`, `StoryLabel`, `ProjectId`, `G2`, `Multiaddr`, `RTCPFB`, `PutScalingPolicyCommandInput`, `Limits`, `StructPrimitiveType`, `CtrAnd`, `Subgraph`, `zmq.Dealer`, `UiStateStorage`, `OutputTargetDocsReadme`, `MockHTMLElement`, `ts.Node`, `RestManager`, `Autocomplete`, `ResultTreeNode`, `IRuleSpec`, `ClearingHouseUser`, `SelectedUser`, `Gif`, `ChartErrorEvent`, `LoadDataParams`, `DeleteProfileCommandInput`, `Probot`, `SomeInstance`, `SavedObjectMetaData`, `OrgType`, `NodeJS.Signals`, `ExternalFile`, `Operand`, `IVersionedValueWithEpoch`, `IVertoCallOptions`, `IStoreOffer`, `TextFont`, `SymbolDataVisibility`, `ObjectNode`, `FsTreeNode`, `FnArg`, `CompressedPixelFormat`, `SelectStep`, `SectionDataObject`, `ConnectState`, `InterfaceType`, `IDisposer`, `BaseMarker`, `Toxic`, `MemoryFileSystem`, `HsSaveMapManagerService`, `ConnectedOverlayPositionChange`, `RequesterMap`, `CdsIcon`, `ValidResponses`, `Submit`, `TextureDataFloat`, `LeaderboardRecord`, `URLSearchParams`, `books.Table`, `ExceptNode`, `DaffCartItemFactory`, `ExpectedResponse`, `TaggedNumericData`, `SerializedObject`, `RenderFlag`, `TimeIntervalTriggeringPolicyConfig`, `LocalParticipant`, `JobType`, `UseSidePanelProps`, `ICollectParms`, `RuntimeEngine`, `TransitionSpiral3d`, `SpaceStyleProps`, `DejaPopupAction`, `types.ScrollData`, `CommandManager`, `CreatePresetCommandInput`, `Children`, `Mod`, `Interior`, `Handlebars.HelperDelegate`, `ModbusEndianness`, `Install`, `TexImageSource`, `DynamicAlternative`, `FlowParameter`, `LoginAccountsRequestMessage`, `ValidationComposite`, `DeleteOneOptions`, `LineChart`, `WebcamIterator`, `RemoteEngine`, `InvalidPaginationTokenException`, `SpeechGenerator`, `MdlPopoverComponent`, `Script3D`, `TableListParams`, `esbuild.BuildResult`, `MockTextNode`, `core.ScalarOutSpread`, `IntrospectionSchemaVersion`, `FolderId`, `TabContentItem`, `ResourceChange`, `ImageService`, `IReader`, `IIndexPattern`, `HttpLinkHandler`, `RequestType2`, `MacroAction`, `IGameEditorContext`, `DbRefBuilder`, `Wnd`, `ArtifactItemStore`, `TreeNodeProps`, `XyzaColor`, `BulletViewModel`, `OptionsSync`, `FoamGraph`, `ActionTypes`, `ValidationRules`, `Enhancer`, `SearchConfig`, `GfxRenderHelper`, `DomPortalOutlet`, `TransactionResult`, `MailStatusDto`, `SecService`, `FieldTypeMetadata`, `Blockchain`, `TextMap`, `DatabaseTransactionConnection`, `INavNodeFolderTransform`, `SchemaField`, `TranslationService`, `IMQRPCRequest`, `requests.ListVmClusterUpdateHistoryEntriesRequest`, `QuestionType`, `SetterOrUpdater`, `Ingress`, `ModalState`, `EventListenerOrEventListenerObject`, `Binder`, `ExtraArgs`, `CommandEventType`, `UX`, `EdgeType`, `FcEdge`, `PageBoundingBox`, `ValueSuggestionsGetFnArgs`, `AWSOrganizationalUnit`, `PageObject`, `GenerateResponse`, `SerializableValue`, `HSLVector`, `SpawnASyncReturns`, `System_Array`, `PDFContentStream`, `IWallet`, `UploadxService`, `FormField`, `DeleteRuleGroupCommandInput`, `FrameNote`, `ScannedDocument`, `ISnippet`, `DueReturn`, `DeregisterInstanceCommandInput`, `IWindow`, `StackDataValues`, `Restangular`, `ClientTipsData`, `ExternalData`, `SMTFunction`, `WFWorkflow`, `AnalyzerEntry`, `ChainIdLike`, `NumberDraggerSeg`, `DraggableInfo`, `Matrix3x2`, `IAggType`, `JSDOM`, `ListModelDeploymentsRequest`, `StoreApi`, `FirmaSDK`, `PublishResponse`, `pxtc.SymbolInfo`, `CommentRequest`, `PartialValues`, `FunctionOrConstructorTypeNode`, `ReturnValue`, `API.IMiscInfos`, `MilestoneDataPoint`, `OAuthConfigType`, `WorkerManager`, `TextWriter`, `ListTagsForResourceCommandInput`, `IDinoRequestEndProps`, `ConfigParams`, `DataViewObject`, `HandlerMap`, `WebSocketLike`, `Table2`, `CreateProcedureOptions`, `ProxyObject`, `EpochIteratorConstructor`, `IReCaptchaInstance`, `NormalizedComponentOptions`, `AstParsingResult`, `BitMatrix`, `NgGridRawPosition`, `ForEachPosition`, `IResolvedUrl`, `CdtFrontElement`, `TableConstraint`, `ProtoCtx`, `CompilerBuildStart`, `XArgs`, `ExpressionType`, `ts.IndexSignatureDeclaration`, `UpdateActivatedEvent`, `GfxProgram`, `V1CommandLineToolModel`, `V1Role`, `WebSession`, `FILTERS`, `SpywareClass`, `MutationFunction`, `Area2DSW`, `ICurve`, `DkrTextureCache`, `ComponentMeta`, `TypedObject`, `ScriptThread`, `IdType`, `Bals`, `knex.Raw`, `StateTree`, `BatchSync`, `TooltipOperatorMenu`, `IntegrationClass`, `StyleObject`, `ComplexNode`, `GX.LogicOp`, `ISubmitData`, `UserExtendedInfo`, `Showtime`, `CalculationYear`, `ProtocolMessage`, `ElementCore`, `Chalk`, `RecoilTaskInterface`, `DecoratorData`, `CreateChannelBanCommandInput`, `StoreLike`, `MessageIDLike`, `IExcerptTokenRange`, `OrientedBounds`, `CapabilitiesSwitcher`, `SearchFilter`, `BaseOperation`, `AngularExternalStyles`, `BackwardRef`, `OpenSearchDashboardsReactContextValue`, `TestKafka`, `ShippingAddress`, `FreezeObject`, `GitHubLocation`, `IDataModel`, `FrontCardAppearanceShort`, `MetaTransaction`, `MonacoEditorService`, `JSONInput`, `CreateTableOptions`, `IScene`, `JSONSchemaType`, `FileListProps`, `WTCGLRenderingContext`, `UserAnnotationSet`, `AppViewRoute`, `Cons`, `TasksService`, `Receipt`, `d.FsReadOptions`, `Quadrant`, `typedb.DBType`, `TestSource`, `Labor`, `NVMJSONNodeWithInfo`, `IExecutionResult`, `Picture`, `EthersBigNumber`, `AbstractMesh`, `PrRepository`, `CalculateHistogramIntervalParams`, `d.JestConfig`, `Labware`, `TextChange`, `GasMode`, `SetContextLink`, `coreClient.RawResponseCallback`, `ILabelConfig`, `RolandV8HDConfiguration`, `Datapoint`, `StateMethodsImpl`, `VectorList`, `RequestResponseLog`, `DeployedWallet`, `SequenceInterval`, `UnitState`, `ConstantArgs`, `AccessModifier`, `NextFunction`, `LoanMasterNodeRegTestContainer`, `InAppBrowser`, `CreateDBInstanceCommandInput`, `IMarkerData`, `GetResultType`, `FullLink`, `SfdxError`, `ITypeFactory`, `NoteNumberOrName`, `LabelUI`, `FromToWithPayport`, `IntNode`, `dom5.Node`, `ListConnectionsResponse`, `MediatorMapping`, `Person`, `CreateWebACLCommandInput`, `Vertex`, `TestingSystem`, `ITaskData`, `ZoomTransform`, `MapboxMap`, `AnyAction`, `ControllerAction`, `APIGateway`, `CanvasSide`, `TAtrulePrelude`, `ICreateVsamOptions`, `Ctor`, `IterableDiffers`, `ScatterProgram`, `OpenAPIV2.Document`, `IRecurringExpenseEditInput`, `DaffProduct`, `InspectResult`, `DaffConfigurableProduct`, `PaymentV2`, `IApiSourceResult`, `EmotesProvider`, `INodeCredentialsDetails`, `SchemaProperty`, `RollupWarning`, `TDiscord.TextChannel`, `EditorView`, `CopyResults`, `GreetingService`, `RowMap`, `ContextItem`, `ITableMarker`, `OpenSearchQuerySortValue`, `IBoundingBox`, `RemoveEventListener`, `IBoxProps`, `ICreateUserDTO`, `ALBEvent`, `JsNode`, `React.SFC`, `StateObservable`, `FieldFormatConfig`, `ReduxActions.Action`, `PrEntity`, `DeleteDomainNameCommandInput`, `FilterConfig`, `mongoListener`, `LedgerReadReplyResponse`, `IORouter`, `fakeDevice.Device`, `ModuleLoaderActions`, `DirectiveLocation`, `TypeOf`, `ResourceHash`, `MockData`, `TaskParameter`, `UserID`, `MemberAccessFlags`, `Message`, `GfxPass`, `TileMeta`, `DebugNode`, `FinalizeHandlerArguments`, `ITargetReference`, `Bytecode`, `LibResolver`, `HybridOffsets`, `CookieAttributes`, `AxeCoreResults`, `Realm`, `UrlGeneratorsSetup`, `GetPolicyRequest`, `SeparableConv2D`, `SemanticTokensParams`, `RequestTask`, `ICellRendererParams`, `CucumberQuery`, `puppeteer.Browser`, `ActorComponent`, `MultipleFieldsValidator`, `ImageTexture`, `WolfState`, `RetryHelper`, `ListSafetyRulesCommandInput`, `EnergyMap`, `SArray`, `RelationIndex`, `TSAudioTrack`, `ProfileOrganizationResponse`, `EarlyReturnType`, `Stroke`, `MeetingAdapterStateChangedHandler`, `HttpClientConfig`, `Campaign`, `Wechaty`, `OtherArticulation`, `TargetGroupAttachment`, `TableOfContentItem`, `Node.Identifier`, `CreateJoinQueryOptions`, `ApplicationDefinition`, `ReadContext`, `CreateMemberCommandInput`, `StorageIdentifier`, `OAuth2Service`, `RouteOptions`, `GridCellBox`, `MsgFromWorker`, `CardFinder`, `OasVersion`, `ExcaliburGraphicsContext2DCanvas`, `DevError`, `GherkinDocument`, `TodoTask`, `ServiceErrorType`, `TaskDraftService`, `CreateReplicationConfigurationTemplateCommandInput`, `VariableContext`, `IPhysicsMaterial`, `HoistState`, `TracksState`, `IMdcSegmentedButtonSegmentElement`, `ICosmosTransaction`, `ResolvedConceptAtomTypeEntry`, `SavedSearchSavedObject`, `BaseInteractionManager`, `AzExtParentTreeItem`, `BinaryNode`, `LoadResult`, `Runtime`, `HttpProvider`, `Vol`, `StoreReadSettings`, `EmbeddableStartDependencies`, `JsonPath.ExpressionNode`, `PhysicsStatistics`, `ChatNodeVM`, `MessageState`, `FlattenedProperty`, `MavenTarget`, `IFormPageState`, `HeaderRepository`, `SubSymbol`, `GLclampf3`, `ActionsStage`, `ListProjectsCommand`, `GShare`, `RootParser`, `GetDocumentCommandInput`, `GeometricElement2dProps`, `HitTesters`, `SettingItem`, `Serials`, `GraphqlData`, `IVimStyle`, `WebviewPanelOnDidChangeViewStateEvent`, `ResolveOptions`, `io.LoadOptions`, `LayerArgs`, `EventDoc`, `GeometricElementProps`, `RequestHandler0`, `OrchestrationVariable`, `BitBucketServerAPI`, `DataSourceSettings`, `vscode.DiagnosticCollection`, `FlamegraphNode`, `JPACVersion`, `PredicateOperationsContext`, `SelectPlayer`, `DefaultInputState`, `SequenceConfiguration`, `SitemapXmpResults`, `DepositKeyInterface`, `Docker.Container`, `SortDirectionNumeric`, `GL2Facade`, `Pause`, `ee.Emitter`, `OperatorAsyncFunction`, `MultisigItem`, `UrlGeneratorsDefinition`, `HoverResults`, `MarkdownTheme`, `JSON_PayloadInMask`, `DataMaskCategory`, `StateStorageEngine`, `AxeResultConverterOptions`, `PyJsonDict`, `ParamNameContext`, `LinearGradient`, `SkygearError`, `ProcessedBundle`, `ISnapshotContents`, `PlayerChoice`, `SpawnOptionsWithoutStdio`, `PostToken`, `Highcharts.Popup`, `EngineAttribute`, `OpenAPIObject`, `ArrayProps`, `NettuAppRequest`, `FieldAgg`, `ITestCase`, `NativePointer`, `TwoSlashReturn`, `SwatchBookProps`, `UserGroupList`, `InputOption`, `d.PrerenderStartOptions`, `IHeader`, `AppAuthentication`, `FormErrorMessageType`, `IsEqual`, `PLI`, `StructureRampart`, `ContainerContent`, `ICoinProtocol`, `TalkOpenChannel`, `SwitchCallback`, `CF.Subscribe`, `KontentHttpHeaders`, `GestureEvent`, `IAtDirectiveData`, `MetaBlock`, `FSNoteStorage`, `CryptoFactory`, `ContextInterface`, `IPrivateKey`, `ProxyRulesSubscription`, `NodeJS.ReadableStream`, `OrganizationAccountConfig`, `PropOfRaw`, `ImageUpdate`, `FrameStats`, `IFormatProvider`, `LicenseSubs`, `RecipientAmountCsvParser`, `SelectorCore`, `TlaDocumentInfos`, `ReadonlyObjectDeep`, `Reverb`, `ExchangePair`, `PersistentCache`, `ParjsResult`, `UserManagerInstance`, `MetricCollection`, `UserConfigDefaults`, `UndoState`, `Widgets`, `ParsedInterface`, `ContractInterfaces.Market`, `KintoClientBase`, `Species`, `Interaction`, `ResponseIssue`, `RangePartType`, `FileQuickPickItem`, `ElementArray`, `Auth`, `ContentActionRef`, `PropertyToValues`, `GraphQLRequestEnvelope`, `SavedObjectMigrationFn`, `requests.ListAnnouncementsPreferencesRequest`, `DemoVariable`, `MetaTagModel`, `Redis.Redis`, `IAnyType`, `AlainDateRangePickerShortcutItem`, `SpanKind`, `HydrateFactoryOptions`, `BarcodeScannerConfig`, `GanttDatePoint`, `WhitePage`, `SingleSigSpendingConditionOpts`, `sinon.SinonSpy`, `EIP712Domain`, `RangeFilter`, `InterfaceSymbol`, `ExchangePriceRecord`, `IPathfindersData`, `TextArea`, `IndieDelegate`, `RebootInstanceCommandInput`, `RpcMessagePort`, `ColorResult`, `TokenPair`, `express.Handler`, `types.signTx`, `NAVObjectAction`, `DetectEntitiesCommandInput`, `GPGPUContext`, `VtxLoader`, `DescribeCertificateAuthorityAuditReportCommandInput`, `ExportSummaryFn`, `ISequencedClient`, `CdsControl`, `ActionSpec`, `TextEditChange`, `IVectorSource`, `UI5Config`, `IFoundElementHeader`, `IFileSystemCreateLinkOptions`, `SavedObjectsStartDeps`, `SfdxWorkspaceChecker`, `ListContactsCommandInput`, `DebugProtocol.DisconnectResponse`, `InitiateResult`, `ClientFactory`, `FileLocationQuery`, `SegEntry`, `TUserBaseEntity`, `MongooseModel`, `PinLike`, `DataSourceTileList`, `AngularExternalResource`, `Locker`, `VChild`, `ParsedRange`, `TransactionReceipt`, `ContractTxQueryResult`, `AsteriskToken`, `NominalTypeSignature`, `CommitIdentity`, `IDBTransactionMode`, `ContextMasquerade`, `Parent`, `monaco.editor.IReadOnlyModel`, `SaveResult`, `IDraft`, `Bsp`, `GetMetaDataFunction`, `ConnectionOptions`, `OverlayKeyboardDispatcher`, `requests.ListTopicsRequest`, `ServiceTypeSummary`, `NotificationRepository`, `TestLegacyLoanToken2`, `AndroidProjectParams`, `DashboardCellView`, `WebViewMessageEvent`, `StreamingStatus`, `ObjectOrArray`, `ViewData`, `IMod`, `GetUpdatedConfigParams`, `StatisticAverageBlockTime`, `BatteryCCReport`, `ILinkedListItem`, `WordCache`, `BridgingPeerConnection`, `React.Context`, `PouchFactory`, `ErrorEmbeddable`, `InitiateOptions`, `SessionLogoutRequest`, `TimingSegmentName`, `NumberKey`, `IObjectDefinition`, `Views.View`, `HasFancyArray`, `EVMParamValues`, `InfraConfigYaml`, `RouterReducerState`, `SendMailOptions`, `GroupButton`, `PositionData`, `IServerGroup`, `BotState`, `JwtAdapter`, `SafeHTMLElement`, `SidebarMenuItem`, `AnyRawBuilder`, `DeploymentResult`, `ISetCombinations`, `ICommonCodeEditor`, `StackItem`, `soundEffectInterface`, `DaffError`, `ProtocolRunner`, `cwrapSignature`, `RAL.MessageBufferEncoding`, `TestKernelBackend`, `DbEvent`, `Int128`, `WebDependency`, `BeancountFileContent`, `InsertOneWriteOpResult`, `OperateBucketParams`, `InsightInfo`, `Masking`, `ThemeExtended`, `Strings`, `NetworkSet`, `PBRMaterial`, `DemoService`, `CheckboxChangeEvent`, `CmsEntryPermission`, `MetronomeNote`, `defaultFontWeights`, `QuoteOptions`, `Trader`, `ConvoState`, `requests.ListManagementAgentsRequest`, `IDataFilterValue`, `StickerExtT`, `Fx`, `InferenceInfo`, `EditableRectangle`, `ArrayValues`, `MinecraftVersionBaseInfo`, `WNodeFactory`, `SubdivisionScheme`, `QueryBinder`, `ZoneNode`, `SessionStateControllerTransitionResult`, `EvaluationScopeNode`, `ConditionalType`, `ComponentMap`, `requests.ListBdsApiKeysRequest`, `WebXRSystem`, `StorageTier`, `ZoneState`, `ParamIdContext`, `ISetupFunction`, `IntCV`, `API`, `TModel`, `G`, `IMenuState`, `DateTime`, `PlaceholderComponent`, `IStoreData`, `CallingBaseSelectorProps`, `OAuth2Client`, `IUnitProfile`, `TestThrottler`, `SyncableElement`, `SubscribeMessage`, `HardwareModules`, `JustValidate`, `Sticker`, `Jsonable`, `PutItemInput`, `PathUpdater`, `DataviewSettings`, `ImportBlock`, `QuizServices`, `EAVNField`, `SelectionEvents`, `SlackMessageArgs`, `Tensor1D`, `Substitution`, `vscode.EventEmitter`, `StateDecorator`, `DataPin`, `HostContext`, `BlobServiceClient`, `AsyncBarrier`, `NumOrString`, `d.OutputTargetWww`, `FastFormFieldComponent`, `ImportFromNode`, `TDiscord.Client`, `LocalVideoStreamState`, `UpdateCourseOverrideBody`, `PathFragment`, `AST.OperationNode`, `Iprops`, `ComputedShapeParams`, `AppDefinitionProps`, `ListClustersRequest`, `KeychainCredential`, `GuildMember`, `TemplateBlock`, `TernaryNode`, `LogFormula`, `Pool2DProgram`, `BaseProps`, `CommandLineBinding`, `SFOverrides`, `Xform`, `MsgDeleteProviderAttributes`, `ListJobsCommandOutput`, `Gettable`, `EcommerceItem`, `ThyDialog`, `ConnectedProps`, `SwapTransition`, `NavNodeManagerService`, `SerializableObject`, `CheckPointObject`, `ReadableStreamDefaultController`, `RoomFacade`, `CartItemsResponse`, `IStorage`, `ContentRef`, `requests.ListAnalyticsInstancesRequest`, `UniOption`, `DisassociateFromAdministratorAccountCommandInput`, `IFilePropertiesObject`, `EventPublisher`, `Nes`, `SelectOptionConfig`, `ICreateOrgNotificationOptions`, `SizeData`, `IHistoryFileProperties`, `THREE.SkinnedMesh`, `RestoreRequest`, `DeleteConnectionRequest`, `ServerCapabilities`, `Security2Extension`, `RegionLocator`, `PlyAdapter`, `MapPolygonSeries`, `Lru`, `Cropping2DLayerArgs`, `PrivKey`, `ScreenElement`, `BlockNumberPromise`, `TranslationBundle`, `MappedCode`, `PacketInfo`, `Awaitable`, `TextChannel`, `ITerminalProvider`, `PortalManager`, `N3`, `DynamoDBDocumentClientResolvedConfig`, `AggParamsAction`, `WorldReader`, `PackageJsonInfo`, `GfxRenderTarget`, `QueryableFieldSummary`, `ConfigurationManager`, `UpdateLongTermRetentionBackupParameters`, `vec2.VectorArray`, `TypeEnvironment`, `Analysis`, `IModelHubClientError`, `AggDescriptor`, `lua_State`, `PartialReadonlyContractAbi`, `UserPreferences`, `TabPane`, `PushSubscription`, `ApiExperiment`, `InstancedBufferGeometry`, `PropertiesMap`, `ArchiveHeader`, `MalSeq`, `INorm`, `InputEventMouseButton`, `WaveFile`, `NftMeta`, `IL10nsStrings`, `ExponentSpec`, `SendableMsgBody`, `OutputTargetDocsCustom`, `AsyncManager`, `CipherResponse`, `FetchFunction`, `PortBinding`, `CreateSelectorFunction`, `ArrayShape`, `ListObjectsV2CommandInput`, `AutoRenderOptionsPrivate`, `RenderItem`, `ExiftoolProcess`, `ServiceDownloadProvider`, `MenuModelConfig`, `LinkedHashMap`, `CameraRig`, `ReferenceMonthRange`, `ReactExpressionRendererProps`, `LeafCstNode`, `SceneGfx`, `InternalPlotConfigObject`, `Lint.IOptions`, `SingleVertexInputLayout`, `AsyncSubject`, `AliasesMeta`, `EventListenerRegister`, `ToastrManager`, `TPositionPair`, `SMTLet`, `CameraHelper`, `AttributeOptions`, `RecordRepresentation`, `ElementSession`, `RecordItem`, `Viewport_t`, `EmbedProps`, `ParseSuccess`, `ShortcutID`, `TableInsertEntityHeaders`, `CreateAppInstanceAdminCommandInput`, `ODataBatchRequestBuilder`, `IViewportInfo`, `IScopeData`, `IServiceConfiguration`, `SymbolLinks`, `FolderResponse`, `PlaybackStatus`, `FocusZone`, `Slack.Message`, `SubmissionServiceStub`, `serialization.Serializable`, `ToastId`, `messages.Envelope`, `TexMtxMapMode`, `JsonRpcRecord`, `MediaDescription`, `UserCredentialsRepository`, `ValidationHandler`, `TableSchema`, `MinMaxConstraint`, `HoverProvider`, `ParamInfoType`, `SslSupport`, `LoudMLDatasource`, `DataModel.RowRegion`, `IGeometryProcessor`, `CommentUI`, `FileModel`, `SavedObjectsBulkUpdateObject`, `VoyagerSubscriptionContextProvider`, `SizeResult`, `ContractMethodDescriptor`, `MeetingHistoryState`, `MultiChannelAssociationCCAPI`, `UserContextType`, `FunctionArgument`, `ConnectResponseAction`, `DokiTheme`, `AnimationComponent`, `SubjectDataSetFilter`, `RoleMapping`, `UserRegisterResource`, `BufferLines`, `CanaryConfig`, `BatchWriteCommandInput`, `FindProjectQuery`, `DatabaseFeatureOptions`, `HubProduct`, `TabNavigationBase`, `Howl`, `CombatVictorySummary`, `CoreTheme`, `DeleteTagsCommand`, `CompilerAssetDir`, `IGif`, `_Transaction`, `ComponentService`, `Directions`, `Tooltip`, `PropertyConfig`, `Measurable`, `OffscreenCanvas`, `AnyArray`, `SlashCommandConfig`, `ProfileResponse`, `Aspect`, `Re`, `ShootingNode`, `RetryStatus`, `Config.IConfig`, `DocsBinding`, `WorkspaceConfiguration`, `U8.U8Archive`, `pxt.Package`, `ts.Token`, `ExtHTLC`, `AddTagsCommandOutput`, `CountingChannel`, `UpdateProfileRequest`, `ChainTokenList`, `MongoDB.Filter`, `StubStats`, `Stylesheet`, `IMutableVector4`, `UsersRepository`, `RequestEntryState`, `AuthenticationInstruction`, `UI5Namespace`, `PutPolicyCommandInput`, `CommentTag`, `builder.UniversalBot`, `SupervisionContext`, `ast.ParserRule`, `FlagValue`, `LogSource`, `MarkerRange`, `MarginCalculatorInstance`, `OverlayReference`, `IWinstonData`, `MpegFrameHeader`, `ArcadeBody2D`, `OrderStruct`, `SearchResourcesCommandInput`, `HookReturn`, `PlaneTransformation`, `ImmutableCell`, `CurrencyAmount`, `WebAppConfigStack`, `YamlParser`, `THREE.Color`, `ECSqlStatement`, `FieldConfiguration`, `GetDedicatedIpsCommandInput`, `RegistrationData`, `StartTagToken`, `PointerCoords`, `AxisLabelFormatterContextObject`, `PresentationRpcRequestOptions`, `EntryType`, `CodeSpec`, `ClassReflection`, `LTypeResolver`, `WorkspaceServiceInstance`, `ExtendedArtifact`, `IContext`, `SelectorsMatch`, `VectorLayerDescriptor`, `QueryFormData`, `PublicKey`, `MenuI`, `Ethereum`, `Electron.OpenDialogOptions`, `TemplatePortal`, `PaginatedSearchOptions`, `DeveloperClient`, `BoxObject`, `CompletionOptions`, `SourceMaps`, `P2SVpnGateway`, `FetchListOptions`, `PropertyDeclaration`, `RegisteredServiceAttributeFilter`, `SceneDesc`, `CoingeckoApiInterface`, `ValueCtx`, `AbsolutePosition`, `SparseMatrix`, `Vis`, `cdk.CustomResource`, `RepositoryFacade`, `AngularFirestore`, `OpticFn`, `OperatorFinishEncodeInfo`, `SheetSpec`, `PrefetchIterator`, `EnumDescriptorProto_EnumReservedRange`, `IFsItem`, `ITemplateBaseItem`, `GetStaticPaths`, `LocationSet`, `NotificationState`, `OutputChannel`, `AssociateServiceRoleToAccountCommandInput`, `CartPage`, `CodedError`, `HydrateResults`, `MergeDeclarationMarker`, `AuthenticateAppleRequest`, `CodeEditor.IToken`, `d.CollectionCompilerMeta`, `ISuite`, `IContentVisitor`, `FoldingRange`, `ConnectedSpaceGraphics`, `MockDocumentTypeNode`, `TransceiverController`, `IDetachable`, `CloneRepositoryTab`, `CustomLink`, `TSESLint.RuleModule`, `SavedQueryMeta`, `TempoEvent`, `LngLatBounds`, `PerformOperationResult`, `EdmT`, `FlatTreeControl`, `Basket`, `TwingNodeType`, `Bank`, `RTCRtpCodecParameters`, `HleFile`, `IQueryBus`, `PaperProfile`, `StoredFile`, `d.ComponentCompilerData`, `AggsState`, `IncludedBlock`, `WsTitleService`, `ResolvedOptions`, `ManagementAgentGroupBy`, `PublicSymbolMap`, `AssignmentDeclarationKind`, `ChainMergeContext`, `JsonAPI`, `AssignmentCopyStep`, `BuildMiddleware`, `TaskParameters`, `SourceFuncArgs`, `dom.Node`, `AllowedNetworks`, `ChannelStateWithSupported`, `CloudFront`, `I18NextPipe`, `ExtractClassDefinition`, `PromoteReadReplicaDBClusterCommandInput`, `InitializationOptions`, `IDireflowConfig`, `StoreST`, `CompilerModeStyles`, `TransactionEventArgs`, `GX.PostTexGenMatrix`, `QueriesResults`, `EndpointDefinition`, `PowerPartial`, `Rate`, `IMatrixEvent`, `DocumentStore`, `ListSecurityProfilesCommandInput`, `IIFeedsState`, `SubjectDetails`, `UUID`, `GetKeyboardResponseOptions`, `HaliaPlugin`, `FargateTaskDefinition`, `BaseArrayClass`, `SendProp`, `ProgressList`, `Scripts`, `IMidwayFramework`, `TableBuilder`, `SMTMaskConstruct`, `TRK1`, `Panel`, `Analyser`, `FileChunkIterator`, `TextFormatter`, `Construct`, `ReduxStoreState`, `PresetMiniOptions`, `IAheadBehind`, `IMark`, `requests.ListWorkRequestLogsRequest`, `ExecutionError`, `ChainState`, `Trait`, `DeserializeOptions`, `d.ScreenshotConnector`, `InsertResult`, `WebSocket.Server`, `IconConfig`, `IterableOrArrayLike`, `BlobDownloadResponseParsed`, `DeleteStudioCommandInput`, `CustomField`, `GuildConfig`, `Subscription`, `RenderingOptions`, `ToplevelT`, `WordOptions`, `ProposalService`, `AssetType`, `SignalingClient`, `MiStageState`, `Subscriptions`, `ArenaNodeText`, `EthereumAddress`, `IntersectionObserverEntry`, `AdaptContext`, `CSSSnippet`, `LevelsActionTypes`, `SessionSourceControl`, `Justify`, `VpcContextQuery`, `SiteLicenses`, `Linter.Config`, `SearchFormLayoutProps`, `DeleteDeploymentCommandInput`, `RelationshipType`, `SelectedPaths`, `ModProperty`, `PureVisState`, `MenuTargetProps`, `IOrg`, `QueryDocumentSnapshot`, `DescriptorProto`, `ContractProgram`, `SavedObjectsUpdateOptions`, `Animation`, `GluegunAskResponse`, `InterfaceDeclaration`, `ModeAwareCache`, `GoogleBooksService`, `IServerSideDatasource`, `OSCMessage`, `MDCCornerTreatment`, `PubKeyType`, `FunctionDefinitionContext`, `Bullet`, `ForceDeployResultParser`, `ThySlideContainerComponent`, `ComparableValue`, `ScenarioCheckInput`, `httpm.HttpClient`, `SaveEntitiesSuccess`, `EmitFileNames`, `ModdedBattleScriptsData`, `RE6Module`, `Now`, `SignedByQuantifier`, `SpineHost`, `IPlugin`, `ExecuteOptions`, `AttrAst`, `SendEmailCommandInput`, `PurchaseOfferingCommandInput`, `ConsumerContext`, `IOfflineData`, `PrimitiveFixture`, `DescribeApplicationsCommandInput`, `IQueryListProps`, `events.EventEmitter`, `ScenarioResult`, `TransformationResult`, `PluginsConfig`, `MSDeploy`, `DescribeAccountLimitsCommandInput`, `IKactusState`, `SponsorsResponseNode`, `DescribeSnapshotsCommandInput`, `ICompilerOptions`, `NotificationID`, `SavedObjectComparator`, `OpConfig`, `AdditionalDetailsProps`, `TestContractAPI`, `DaffCartItem`, `ElementAspectProps`, `NamedTypeNode`, `IBaseComponent`, `EventRegisterer`, `ILegacyScopedClusterClient`, `Codec`, `Observable`, `OnPreAuthToolkit`, `AssignOptions`, `IconifyElement`, `ts.SetAccessorDeclaration`, `TextDocumentSyncOptions`, `ClientMetricReport`, `echarts.EChartsOption`, `NodeVersion`, `SDK`, `BinaryExpression`, `INodeType`, `TRejector`, `Bm.ComposeWindow`, `ISnapshotOptions`, `BrowserWindowConstructorOptions`, `Repo`, `IApiKubernetesResource`, `TernarySearchTreeNode`, `TutorialDirectoryNoticeComponent`, `BuiltLogic`, `IPageProps`, `Classes`, `IOrganizationDepartmentCreateInput`, `XMLElementOrXMLNode`, `AwsTaskWorkerPool`, `StridedSliceDenseSpec`, `Eventual`, `FindByIdOptions`, `VaultActive`, `GfxResource`, `CombinedState`, `IFreestylerStyles`, `ExternalProject`, `CurrencyType`, `DeleteModelCommandInput`, `ThingType`, `IMappingFunction`, `TransactionGasPriceComputator`, `MqttMessage`, `PageScrollService`, `GraphQLDirective`, `RequireOrIgnoreSettings`, `MessageAttributes`, `JournalMetadata`, `PipelineVersion`, `LeafletContextInterface`, `SpriteFrame`, `Trap`, `pointInfoType`, `GitHubIssueOrPullRequest`, `SVGTransform`, `ContextAccessor`, `PluginOption`, `CreateEmailTemplateCommandInput`, `ProjectDataManager`, `ItemUpdateResult`, `ErrorWidget`, `FilterValueExpressionOrList`, `SegmentItem`, `KintoObject`, `AuthEffects`, `CompilerEventFileAdd`, `ProjectDto`, `google.maps.MouseEvent`, `LineChartProps`, `forge.pkcs12.Pkcs12Pfx`, `jsdom.JSDOM`, `DescribeImagesRequest`, `PatternCaptureNode`, `PaneInvalidation`, `ListMigrationsRequest`, `LifecyclePolicy`, `SiteConfig`, `TMessage`, `DtlsRandom`, `Indentation`, `FileFilter`, `GitBranchReference`, `SurveyObjectItem`, `FontCatalogConfig`, `MapDispatchToProps`, `Traversable1`, `OptionalDefaults`, `IBazelCommandOptions`, `Calibration`, `WatchCompilerHostOfConfigFile`, `ElementContainer`, `vscode.DocumentSymbol`, `D2`, `RawSavedDashboardPanel610`, `F3DEX_Program`, `ControlCenterCommand`, `DescribeWorkspaceDirectoriesCommandInput`, `PrinterType`, `ILocale`, `TSType`, `MigrateDeploy`, `Insert`, `StacksTestnet`, `HttpRequester`, `ExtensionState`, `OutputChunk`, `MIRConceptType`, `NumberW`, `TableRequestProcessorsFunction`, `PokemonSet`, `Timings`, `ReductionFn`, `JsonPath`, `MediaStreamConstraints`, `Crawler`, `ApiGatewayLambdaEvent`, `DinoContainer`, `AudioClip`, `a.Module`, `SetConstructor`, `CallbackEntryHelper`, `IGiftsGetByContactState`, `Geom.Rect`, `ItemDataService`, `AT`, `EditText`, `AppType`, `SEMVER`, `optionsType`, `NormalizedConfigurationCCAPISetOptions`, `mitt.Handler`, `QueryPayload`, `DBSchema`, `WalletName`, `ElementAnimateConfig`, `vscode.OpenDialogOptions`, `LineAndCharacter`, `RowTransformFunction`, `Symbols`, `SvelteConfig`, `LovelaceCard`, `ActionFunctionAny`, `TextRewriterState`, `JSEDINotation`, `PvsioEvaluatorCommand`, `ConnectionUI`, `OnboardingItem`, `requests.ListAppCatalogListingResourceVersionsRequest`, `ThemeFromProvider`, `d.CssImportData`, `DataValues`, `ScaleConfig`, `BCSV.Bcsv`, `ServerViewPageObject`, `d.RuntimeRef`, `Matrix22`, `InputTextNode`, `TaggedTemplateLiteralInvocation`, `RedisService`, `RequestorHelper`, `DataSourceService`, `FieldAccessInfo`, `JsxSelfClosingElement`, `TProto`, `WrapEnum`, `PointCloudMaterial`, `ResourceProvider`, `SimpleLogger`, `AuthAction`, `BuildingFacade`, `Accessibility.PointComposition`, `LineColPos`, `Directory`, `Shift.Expression`, `WorkArea`, `ContextualTestContext`, `TCompactProtocol`, `mb.EntityType`, `FilterMetadata`, `ListItem`, `HapiResponseObject`, `Axis`, `LoginResult`, `ArrayType1D`, `ProgressConfig`, `MStreamingPlaylist`, `NerModelVersion`, `FunctionTypeNode`, `CopyDBClusterSnapshotCommandInput`, `ErrorSubscriptionEvent`, `VertexInfo`, `SetShape`, `ShapeProps`, `LiveList`, `ParsedData`, `SignatureDeclaration`, `KxxRecord`, `DealStage`, `JsonRpcRequestPayload`, `ValueMetadataAny`, `SOClient`, `UpdateState`, `SignedTransaction`, `ListenDecorator`, `QueryKeySelector`, `YaksokRoot`, `LaunchTemplateOverrides`, `ConversionResult`, `IpcRendererService`, `UseRequestConfig`, `EitherAsyncHelpers`, `AwsCredentials`, `ContextSet`, `ConvectorControllerClient`, `AsyncQuery`, `FlagInfo`, `TaskList`, `UserDetails`, `UserRegistrationData`, `UserFunction`, `MockStateContext`, `NwtExtension`, `XPath`, `ConnectionArguments`, `O2MRelation`, `PortalOutlet`, `ShadowsocksManagerServiceBuilder`, `Platform`, `ConvertService`, `RustLog`, `OnboardingService`, `IMetadata`, `GX.WrapMode`, `ExtConfig`, `Events.activate`, `IChatItemsState`, `StorageError`, `ExportDeclaration`, `GaugeRenderProps`, `IBase`, `NodePbkdf2Fn`, `HdEthereumPayments`, `SpannedString`, `IKeypair`, `Notes_Contracts.Note`, `YT.SuggestedVideoQuality`, `TestWalker`, `MerchantUserEntity`, `UserEmail`, `ScanOptions`, `WidgetTracker`, `Rand`, `P7`, `PopupDispatcher`, `ClanStateService`, `TProduct`, `LoginPayload`, `RoutableComponent`, `RuledSweep`, `d.ComponentConstructorWatchers`, `CldFactory`, `LineSegment`, `StatsCollector`, `DropTargetMonitor`, `NonThreadGuildBasedChannel`, `MediaProvider`, `AnimationInternal`, `ClickOptions`, `RecognitionException`, `SupRuntime.Player`, `vscode.MessageItem`, `SpyData`, `SubscriptionOptions`, `IPeacockElementAdjustments`, `FnN3`, `THREE.Vector2`, `ChildReferenceDetail`, `BitstreamDataService`, `DataExtremesObject`, `EndOfLine`, `Archiver`, `EntityAttributes`, `DeleteIdentityProviderCommandInput`, `StringType`, `d.EmulateConfig`, `cdk.Construct`, `QueryExecutor`, `DeviceType`, `IterationUI`, `BigQueryRequest`, `DirectoryNode`, `FrescoError`, `WheelEventState`, `ListNamespacesCommandInput`, `ImageResolvedAssetSource`, `NormalizedFormat`, `IndicatorQueryResp`, `ColumnPoint`, `LexContext`, `ResolvedAliasInfo`, `VoiceFocusDeviceOptions`, `FIRDataSnapshot`, `A8`, `User`, `ColorSwitchCCGet`, `Skola24Child`, `Discussion`, `IVirtualDeviceResult`, `ParameterType`, `AddFriendsRequest`, `DrawCommand`, `KeycodeCompositionFactory`, `ReadModelStore`, `ISDK`, `FilePreviewModel`, `WatchCallback`, `ReturnStatement`, `LoaderConfOptions`, `InfiniteScrollDirective`, `UniLoginSdk`, `AnchoredOperationModel`, `def.Matrix44`, `SpinnerProps`, `FishSprite`, `LnRpc`, `OrderRepository`, `requests.ListVirtualCircuitBandwidthShapesRequest`, `ProviderIndex`, `IFlavorInfo`, `BackendError`, `NvLocation`, `ParsedAcceptHeader`, `ResilienceOptions`, `LayoutManager`, `p5`, `ElasticsearchConfigType`, `EntityState`, `GlobalInstructionData`, `MUser`, `cc.BoxCollider`, `GQtyError`, `IEventLogService`, `TinaFieldEnriched`, `DatabaseInfo`, `TestResult`, `Terminator`, `QueryParameters`, `HandlerParamMetadata`, `GlimmerAnalyzer`, `__SerdeContext`, `C_Point`, `PartyLeader`, `NodeWorkerMain`, `IPaginationProps`, `VTTCue`, `DesktopCapturerSource`, `CityPickerColumn`, `TestFixture`, `ICommit`, `BookModel`, `React.TransitionEvent`, `DailyRotateFile`, `GetPostsResponse`, `JobID`, `TypeSourceId`, `BooleanInput`, `MiddlewareResult`, `TypeCheck`, `SpawnSyncOptions`, `TiledProperty`, `UpdateRecorder`, `HDKey`, `HsdsCollection`, `LifecyclePeer`, `Types.SocPromise`, `SymbolSize`, `ConcatInputs`, `AlertConfig`, `CustomEditorUpdateListener`, `MyItem`, `NineZoneStagePanelsManagerProps`, `ListNodesCommandInput`, `SFUISchema`, `PlansState`, `EnhancementRegistryDefinition`, `QueryableFieldDescription`, `ZoomLevels`, `AuthenticationSessionsChangeEvent`, `EmitHelper`, `ContractState`, `Meter`, `AddApplicationCloudWatchLoggingOptionCommandInput`, `JavaScriptDocument`, `FirebaseHostingSite`, `ChemicalState`, `chrome.windows.Window`, `ConnectionInformation`, `SpecialKeyMatchResult`, `IMarkdownDocument`, `UpdateChannelRequest`, `WebGLContext`, `IFlowItem`, `L2`, `MessageDataOptions`, `DispatchQueue`, `DBProvider`, `PBXFile`, `ContainerSample`, `RequestConfiguration`, `DiffCopyMessage`, `ICompiledFunctionCall`, `DaffRouteWithDataPath`, `HlsEncryption`, `BitcoinPaymentsUtilsConfig`, `IStepAdjustmentView`, `RecentData`, `JSDocTypeExpression`, `PatternSequenceNode`, `IAssetTag`, `Http3Request`, `Register8`, `DMMF.SchemaField`, `Evaluation`, `AggregateValueProp`, `IEncoder`, `TelemetryEvent`, `GraphQLSchema`, `Ninja`, `SliderCheckPoint`, `TemplateParam`, `Domain`, `ElasticsearchServiceStart`, `CurrentProfile`, `APIWrapper`, `ScriptElementKind`, `FilterState`, `ARCommonNode`, `DistinctOptions`, `MediaStreamTrack`, `requests.ListHttpProbeResultsRequest`, `ExercisePlan`, `StepModel`, `LibrariesBuilder`, `UserPaypal`, `BrowseDescriptionLike`, `LoginModel`, `ObjectTypeComposer`, `GraphQLModelsRelationsEnums`, `Authorizer`, `InterceptorOptions`, `EditorFile`, `MonacoFile`, `ThyPopoverConfig`, `DirectiveType`, `NodeRequire`, `TickFormatter`, `ListDatasetsRequest`, `ThyTableGroup`, `ContainerAdapterClient`, `TaskInfoExtended`, `FirenvimElement`, `OpenFile`, `CommerceTypes.ProductQuery`, `SeriesOptions`, `MappingLine`, `AuthReduxState`, `IQuestionToolboxItem`, `TypeNames`, `ProcessExecution`, `Transducer`, `LogCallbackType`, `DataTypeResolver`, `InterfaceWithEnumFromModule`, `DirectionType`, `BrowserLaunchArgumentOptions`, `DryPackage`, `GroupLocalStorage`, `ImageDefinition`, `ListManagementAgentInstallKeysRequest`, `TokenRange`, `UnsubscribeMethod`, `ObjectRelationship`, `VoiceConnection`, `SubmissionObjectEntry`, `PeerService`, `MockDataset`, `S3Location`, `ThyAnchorLinkComponent`, `ExclusiveDrawerPluginConstructor`, `TSESTree.Node`, `HsToastService`, `BoardDoc`, `StreamEmbed`, `NetworkgraphSeries`, `DialData`, `RelationClassDecorator`, `HassEntity`, `SetModel`, `HttpInterceptController`, `TestModuleMetadata`, `SceneNodeBuilder`, `MockPointOptions`, `SentMessageInfo`, `MutableTreeModelNode`, `kbnTestServer.TestElasticsearchUtils`, `ICandidateInterviewersCreateInput`, `RepoService`, `SvelteComponent`, `StoryContext`, `EvmContext`, `MessageArg`, `AuthGuard`, `MockLoadable`, `TextureOverride`, `AddRoleToDBClusterCommandInput`, `MetadataClient`, `ISearchStart`, `ProtocolName`, `RGroup`, `EngineArgs.MarkMigrationAppliedInput`, `model.InstanceOf`, `AppStatusChangeFaker`, `PingPayload`, `jest.CustomMatcherResult`, `IsBound`, `SonarrSettings`, `EntityCollectionResolver`, `IScreenshot`, `FunctionSetting`, `WebhookProps`, `Listr`, `FormatContext`, `ApplicationConfigService`, `WalletConfig`, `ApiType`, `Functions`, `LabDirectory`, `GQLQuery`, `EmbeddingLayerArgs`, `StartDeploymentCommandInput`, `GraphicsGrouping`, `IUserModelData`, `GQtyClient`, `Marks`, `TemplateEngine`, `SourceControlResourceState`, `backend_util.TypedArray`, `Not`, `ListingType`, `FieldDescriptorProto`, `AudioProcessingEvent`, `ICXOrder`, `SearchResponse`, `ContactId`, `IDataItem`, `OptionComponent`, `LabelDefinition`, `DescribeParametersCommandInput`, `Tensor2D`, `FilterQueryBuilder`, `TaskEvent`, `Audio`, `EmployeeStatisticsService`, `AppDataType`, `CrochetModule`, `BoardBuilder`, `ResolveImportResult`, `ImGui.IO`, `ApplicationTheme`, `BSPSphereActor`, `AddUpdatesEvent`, `RegistryContract`, `HoverFeedbackAction`, `LocaleMap`, `DejaColorFab`, `PaneWidget`, `Interpret`, `CSSRuleList`, `VocabularyOptions`, `LanguageInfo`, `mongoose.Model`, `CombinedReportParameters`, `ICharacter`, `DocMetadata`, `House`, `ReturnNode`, `RouterContext`, `ILocalizationFile`, `TaskChecklistItem`, `NameObjExecuteInfo`, `ISong`, `SnippetOptions`, `RouteRecordRaw`, `SegmentAPISettings`, `ITimelineData`, `AnimationBuilder`, `ViewContainerPart`, `keyComb`, `GetUsageStatisticsCommandInput`, `HelpRequestArticle`, `IPatch`, `ObservedDocument`, `ISettings`, `IndividualChange`, `TESubscr`, `UIBeanHelper`, `ListFileStatResult`, `DictionaryFile`, `TestScriptErrorMapper`, `AnyPersistedResource`, `EditRepositoryPayload`, `MerchantGoodsService`, `SelectionItem`, `ChipService`, `RayPlaneCollisionResult`, `DragulaService`, `Sidebar`, `KeyMapping`, `Mockchain`, `IEventListener`, `DaffPaypalReducerState`, `CLM.UserInput`, `IEmployeeUpdateInput`, `CellType`, `DeliveryOptions`, `SavedObjectAttributes`, `ValidationBuilder`, `MeetingParticipant`, `Field_Ordinal`, `IndyLedgerService`, `ConnectedPeer`, `RecursivePartial`, `JobRunSummary`, `DateFormat`, `ManagementOption`, `NodeName`, `PanelPoints`, `IMyDateModel`, `CommonService`, `NestedStructuresCommandInput`, `d.Diagnostic`, `ISearchFeature`, `TargetDefinition`, `TokenFilter`, `TranslatePropertyInput`, `IMidwayContainer`, `QueryOrdering`, `HairProps`, `DaffCategoryFilterRangeNumeric`, `TensorTracker`, `DaffCategoryPageLoadSuccess`, `WIPLWebpackTestCompiler`, `CodeGenField`, `NohmModelExtendable`, `GaussianDropoutArgs`, `Binary3x3x3Components`, `MaterialEditorOptions`, `MatBottomSheetContainer`, `HierarchyPointNode`, `AudienceOverviewWidgetOptions`, `hubCommon.IRevertableTaskResult`, `SearchInWorkspaceRootFolderNode`, `ClsService`, `LitecoinBalanceMonitorConfig`, `IGetMembersStatistics`, `UseSavedQueriesProps`, `Frakt`, `ILocationResolver`, `JWTVerifyResult`, `StatusBarService`, `IAccountProperties`, `ISettingsIndexer`, `SvgViewerConfig`, `DynamicColorProperty`, `Oas20Parameter`, `FlowPostContextManagerLabel`, `indexedStore.FetchResult`, `ts.UserPreferences`, `PersistentCharacter`, `VerifyUuidDto`, `IMusicRecordGrid`, `UberToggleState`, `JSParserOptions`, `Vector3d`, `TwingTokenStream`, `HallMenus`, `ScriptTarget`, `Responses.IListContentItemsResponse`, `TreeData`, `angular.IIntervalService`, `Mocha.Test`, `_this`, `TupleCV`, `WifDecodeResult`, `DownloadStreamControls`, `PinType`, `FSM`, `Mesh_t`, `DescribeSourceServersCommandInput`, `DurationLike`, `CreateEventSubscriptionResult`, `CoinbasePayload`, `enet.NetData`, `SkeletonShapeProps`, `AttendanceService`, `LogsConfiguration`, `TriumphNode`, `ScreenName`, `MaxNormArgs`, `StateDictionary`, `ISuperdesk`, `VisualizationLinkParams`, `AccountAttribute`, `BoxUnit`, `TriangleCandidate`, `DateInputObject`, `CompleteLayerUploadCommandInput`, `RElement`, `GasParameters`, `TiledTSXResource`, `UserTokenPolicy`, `listOptions`, `IScreen`, `PayloadHandler`, `Worker`, `CSharpField`, `WithItemNode`, `LabelNode`, `messages.PickleTable`, `AssetParts`, `DescribeGroupCommandInput`, `TransformerStep`, `SSRContext`, `AggConfigsOptions`, `ComponentsObject`, `I18N`, `TypescriptMember`, `ItemWithAnID`, `Mars.AddressLike`, `NavService`, `TPagedList`, `AccountSetBase`, `AuthorizationErrorResponse`, `AtomicToken`, `StyledButtonProps`, `SelectionSet`, `CachedBreakpoint`, `ParameterConstraints`, `RRES`, `Point.PointLabelObject`, `RollupBlock`, `DispatcherPayloadMetaImpl`, `RegistryInstance`, `PrimitivePropertyValueRenderer`, `FeatureCatalogueEntry`, `XMenuNode`, `WrapperProps`, `Upgrades`, `ListProtectedResourcesCommandInput`, `Indices`, `ParseField`, `TSender`, `StyleMapLayerSettings`, `V1WorkflowInputParameterModel`, `ObjectType`, `AnimatorPlayState`, `Tween24`, `WebBinding`, `ParseSpan`, `InstallerMachineContext`, `HealthType`, `ImageryMapExtent`, `Json.Segment`, `QueryNodePath`, `ITerminal`, `d.OutputTargetDocsVscode`, `AppStateType`, `FoodRelation`, `ComponentServer`, `CAST_STRATEGY`, `FromYamlTestCaseConfig`, `REQUIRED`, `HttpRequestConfig`, `DOMStringMap`, `FeatureManager`, `AgencyApiRequest`, `AlertAccentProps`, `ProtoKeyType`, `ts.TransformerFactory`, `NumberLike`, `TokenFactory`, `FindEslintConfigParams`, `MaybeAsyncHelpers`, `TestIntegerIterator`, `ScanArguments`, `DialogContext`, `RequestDetails`, `EmitFlags`, `ToastState`, `PvsContextDescriptor`, `GfxrGraphBuilder`, `StringAnyMap`, `PropertyDataChangeEvent`, `TObjectProto`, `PropertyValues`, `OhbugClient`, `VirtualEndpoint`, `SelectToolConfig`, `RnM2TextureInfo`, `Control`, `MythicAction`, `PatternClassNode`, `PartialExcept`, `ReadFn`, `DeviceVintage`, `BillDate`, `ScopeDef`, `RangeValue`, `OutgoingRegistry`, `PositionSide`, `PointRef`, `SavedObjectsImportResponse`, `IExtraArgument`, `DesignerVariable`, `QueueService`, `requests.ListInstanceConfigurationsRequest`, `TextEditor`, `GLRenderingDevice`, `cc.SpriteFrame`, `WorkerPoolResource`, `TaskEither.TaskEither`, `CreateLoadBalancerCommandInput`, `FilterCallback`, `WexBimRegion`, `interfaces.BindingWhenOnSyntax`, `MessengerTypes.BatchItem`, `MetaSchema`, `restm.RestClient`, `MatDialog`, `PerformListFilesArgs`, `MovieDetails`, `Linkman`, `RandGamma`, `ComponentChild`, `ofAp`, `PropertySignature`, `PyteaServer`, `NetworkProfile`, `CreateApplicationResponse`, `TwoWayRecordObservable`, `AllowedModifyField`, `FunctionMethods`, `TestOperation`, `ApiConfiguration`, `AttributifyOptions`, `FsFiles`, `Events.collisionstart`, `IAmazonClassicLoadBalancerUpsertCommand`, `UniversalRouterSync`, `MongoIdDto`, `IndexInfo`, `TmdbTvDetails`, `MousecaseResult`, `VisualizationsStart`, `DictionaryKeyEntryNode`, `SessionExpired`, `ApiScope`, `RelationshipService`, `DistanceMap`, `KeyboardKeyWrapper`, `DropHandlerProps`, `SyncDBRecord`, `BindingWrapper`, `ObjWrapper`, `MaximizePVService`, `RootType`, `QueryOption`, `TargetRange`, `OperationBatcher`, `sdk.IntentRecognitionCanceledEventArgs`, `AnimationTrack`, `TreeBranch`, `CurrentHub`, `ResizeStrategy`, `SimpleCondition`, `DeploymentOptions`, `Scheduler`, `ListPackagesRequest`, `lsp.Connection`, `Node.Traversal`, `AbstractControl`, `RequestStatistics`, `Models.LeaseAccessConditions`, `IEditorAction`, `GetUserCommandInput`, `apid.ProgramGenreLv1`, `DebugProtocol.ContinueResponse`, `requests.ListIPSecConnectionTunnelSecurityAssociationsRequest`, `RSSSource`, `HsCoreService`, `CommentReply`, `TreemapSeries.NodeValuesObject`, `InnerAudioContext`, `RawTypeInfo`, `ODataFunctionResource`, `IChannelStorageService`, `AstWalker`, `LoaderContext`, `AddressBookConfig`, `BScrollOptions`, `SelectOptionComponent`, `UntypedProductSet`, `DaffAuthTokenFactory`, `UpdateProfileParams`, `FirebaseProject`, `IEtcd`, `CheckOriginConflictsParams`, `KeyFunction`, `TextBuffer.Point`, `Calendar`, `R`, `MovingDirection`, `http.ServerResponse`, `TextElementGroup`, `IMouseZone`, `GherkinType`, `ChainManifest`, `GReaderConfigs`, `ResourceHelper`, `MapOptions`, `PointContainer`, `TreemapSeriesData`, `DebugContext`, `MockErc20Token`, `PluginVersionResource`, `WrapperLayerArgs`, `NpmPackageManager`, `FilePaths`, `JobTypes`, `MockService`, `TextureMapping`, `UseRefetchReducerAction`, `DaemonSet`, `uint16`, `IActionsProps`, `requests.ListAlarmsStatusRequest`, `EventsTableRowItem`, `Event_2`, `PromptProps`, `BitstreamFormat`, `FunctionAppStack`, `AppABIEncodings`, `ProposalManifest`, `IArgDef`, `IPole`, `ListLoggingConfigurationsCommandInput`, `RankedTester`, `CheckPrivilegesResponse`, `IpcMainListener`, `ParsedDateData`, `RpcMessage`, `MaterialVariant`, `DataSourceConfiguration`, `SQLStatement`, `ModifyReadResponseFnMap`, `UpdateSiteCommandInput`, `TrialVisit`, `IConfiguration`, `apid.ReserveEncodedOption`, `ThyTableColumnComponent`, `NonExecutableStepCall`, `InputTree`, `PingPongObserver`, `ast.PersistNode`, `WordcloudSpec`, `URLMeaningfulParts`, `RenderPassContextId`, `VroRestClient`, `CompilerEventBuildNoChange`, `SlideComponent`, `ElementNode`, `HoverSettings`, `LightGroupCircuit`, `ThreadState`, `CardManifest`, `ConvLSTM2D`, `THREE.Raycaster`, `SerializedBoard`, `InputParallelism`, `VisualizePluginSetupDependencies`, `GitClient`, `ManualServerConfig`, `EllipsoidPatch`, `FunctionDefinitionNode`, `NoteSequence`, `TypeReference`, `PeerContext`, `AuditoryDescription`, `CompiledProxyRule`, `ProviderRange`, `BooleanSchema`, `Panner`, `http.IncomingMessage`, `ModelStoreManagerRegistry`, `GroupKeysOrKeyFn`, `Shared.TokenRange`, `ResourceFile`, `ParticleArgs`, `FormatterSpec`, `browser.tabs.Tab`, `QuerySuggestion`, `DataPersistence`, `ResourceSystem`, `BrowserDriver`, `ClassElement`, `StepName`, `HTMLFrameElement`, `GenericGFPoly`, `ThrowStatement`, `StateA`, `TimeoutErrorMode`, `PLSQLCompletionDefinition`, `CCAPI`, `Donation`, `HomeOpenSearchDashboardsServices`, `DeleteRegexPatternSetCommandInput`, `IContainerNode`, `IpcEvent`, `ModuleSpecifierResolutionHost`, `WalletResult`, `StringSchema`, `tfl.SymbolicTensor`, `StyleExpression`, `ConfigOption`, `FrameworkType`, `DLabel`, `XSLTToken`, `IConfigOptions`, `SignatureHash`, `UpdateRequest`, `AppStackMajorVersion`, `MidiNote`, `FieldArrayRenderProps`, `StringOrTag`, `HashTag`, `ProofFile`, `gradient`, `PendingQueryItem`, `ListAutoScalingConfigurationsRequest`, `StatementListNode`, `ZeroExTransactionStruct`, `ProcessingJobsMap`, `ActualT`, `ElectronShutdownCommandOptions`, `SignatureInfo`, `RestMultiSession`, `MyEpic`, `PlatformRef`, `Security2CCNonceReport`, `ActivableKey`, `HDWallet`, `LogItem`, `TrackedSet`, `nVector`, `CdsTreeItem`, `Looper`, `ApplyHandler`, `AuthScopeValues`, `IDeliveryClientConfig`, `ErrorFormatter`, `AuthenticatorFacade`, `TensorData`, `ExerciseService`, `Mixer`, `DirectiveDefinition`, `RouteTable`, `DriveFile`, `DiagnosticInfo`, `d.RollupAssetResult`, `IPointCloudTreeNode`, `ParserOptionsArgs`, `RTCCertificate`, `ImportResolverFactory`, `ReactNode`, `Merchant`, `ValueAccessor`, `ContentGroup`, `DataRequest`, `ObjMap`, `OrderedSet`, `CmsEditorContentModel`, `CoreSystem`, `Object3D`, `GunGraphNode`, `CheckRunPayload`, `ICompetition`, `IComponentWithRoute`, `DashEncryption`, `UpdateRegistryCommandInput`, `InputBoxOptions`, `TransformedPoint`, `SnapshotMetadata`, `PrimitiveShape`, `AccountInfo`, `ICommandArguments`, `RaycasterEmitEvent`, `PluginRevertAction`, `SVGRect`, `HeroAction`, `TweetItem`, `JLCComp_t`, `ISecurityToken`, `IdentityProviderConfig`, `PackInfo`, `CollectionState`, `NodeChildAssociationEntry`, `ModelService`, `FlexProps`, `IFeature`, `SFProps`, `oke.ContainerEngineClient`, `NowFile`, `BackstageItem`, `MatSliderChange`, `TopUpProvider`, `requests.ListUserAnalyticsRequest`, `RoamBlock`, `ExtUser`, `ProviderConstructor`, `LabeledStatement`, `TimesheetService`, `DisplayNameOptions`, `ModuleConfiguration`, `Progress`, `Depth`, `Referral`, `LangiumConfig`, `Producer`, `GameObj`, `Unbind`, `InterfaceVpcEndpoint`, `GeoSearchFeature`, `ValidationFuncArg`, `System`, `AWS.S3`, `TimeFormatter`, `ClassRefactor`, `IComponentState`, `CheckerBaseParams`, `GetCapabilitiesXmlLayer`, `TRPCResult`, `AnyMap`, `ITransformResult`, `PropertyDescriptorMap`, `DataViewsContract`, `TypeGenerics`, `BluetoothRemoteGATTService`, `UseMutationState`, `Workshop`, `CombinedScanResult`, `PutLifecyclePolicyCommandInput`, `DevicePixelRatioObserver`, `ApplicationEntry`, `SceneControllerConfigurationCCReport`, `IBackgroundImageStyles`, `Hotkey`, `ScatterSeries`, `ArrayBindingPattern`, `ListMultipartUploadsRequest`, `NetworkStatus`, `HsLogService`, `LogProperties`, `LoggerConfig`, `IdentifierAttribute`, `ProjectContainer`, `PlayerContext`, `Sinon.SinonStub`, `TransientSymbol`, `DocumentValidationsResult`, `IModelConfiguration`, `CustomSkill`, `DaffCategoryFilterEqualOptionFactory`, `FilterData`, `CustomParameterGroup`, `ObservableSetStore`, `HandlerProps`, `GX.Register`, `PreprocessorGroup`, `requests.CreateConnectionRequest`, `ThrottleOptions`, `CompareAtom`, `DecimalAdjustOptions`, `CATransform3D`, `TimelineProvider`, `SupportedService`, `google.maps.MarkerOptions`, `GetTableRowsResult`, `ModulesContainer`, `RegionData`, `RepoCommitPathRange`, `IUIMethod`, `TEX0Texture`, `AbstractCrdt`, `AvailabilityStatus`, `ws`, `ProtectionRuleExclusion`, `KibanaPrivilege`, `IJob`, `ICitableSource`, `AccountStore`, `path.ParsedPath`, `OpenSearchdslExpressionFunctionDefinition`, `Functor2`, `SkinnedMesh`, `XListNode`, `RawSourceMap`, `FakeSystem`, `KeyEventLike`, `DockerOptions`, `IValidatedEvent`, `ListGroupUsersRequest`, `EntityCacheReducerFactory`, `ViewGroup`, `EPObject`, `CssRule`, `MutationArgsType`, `VdmComplexType`, `ConfigTypes`, `Pos`, `EnrichedLendingObligation`, `GroupBySpec`, `Collateral`, `UpdateIntegrationCommandInput`, `UiActionsSetup`, `AnimationChannelTargetPath`, `SingleConsumedChar`, `SwitchOptions`, `CandyDateType`, `IRandomReader`, `EthApi`, `NineZoneStagePanelManager`, `RawResponse`, `Intl.NumberFormatPart`, `DisplayProcessor`, `CatService`, `IPositionCapable`, `AspectRatioType`, `DOMInjectable`, `LineHeight`, `ScriptVM`, `IICUMessageCategory`, `ReactTypes.DependencyList`, `d.OutputTargetAngular`, `crypto.Hash`, `DGuard`, `KvMap`, `AMapService`, `ExtendedFloatingActionButton`, `IExecutionQueue`, `Operator`, `DeleteComponentCommandInput`, `Patterns`, `QComponentCtx`, `SbbNotificationToastRef`, `CodeBlockProps`, `SvelteComponentDev`, `IntrospectionEngine`, `UserFunctionDefinition`, `MarkerInfoNode`, `SceneColorTheme`, `ListDataSetsCommandInput`, `IViewPathData`, `HeaderObject`, `MultiKey`, `JSDocNullableType`, `SeriesConfig`, `ApolloSubscriptionElement`, `ValuePredicate`, `DocViewInput`, `NoteData`, `RushCommandLineParser`, `PageViewComponent`, `GenericMonad`, `PaginationModel`, `ODataModel`, `DoorFeatureType`, `BottomNavigationTabBase`, `StatFrame`, `ParameterList`, `IServerOptions`, `LogDomain`, `PositionContext`, `btCollisionShape`, `PickTransformContext`, `Rule.RuleMetaData`, `RawRuleset`, `CommentRange`, `StandardEvents`, `MatHint`, `CallClient`, `ValidationRuleMetaData`, `ListOfPoints`, `Initiator`, `IModelType`, `https.AgentOptions`, `JacksonError`, `PartialCanvasThemePalette`, `LoadingBarsEffectsRefs`, `CreateUserResponse`, `NotificationChannel`, `CustomRequestOptions`, `CorporationCard`, `TEName`, `IPluginOptions`, `NotificationDocument`, `EnumTypeDefinitionNode`, `Slicer`, `webpack.Compilation`, `d.OutputTargetDist`, `CommonMaterial`, `TagEntry`, `UIMillStorage`, `AboveBelow`, `LibraryType`, `Keyed`, `ValueValidationFunc`, `ModuleResolutionKind`, `SingleSelectionHandler`, `LitecoinSignedTransaction`, `IDocumentWidget`, `SerializedConsoleImpl`, `CHR0`, `Evt`, `ICachedResourceMetadata`, `ModuleModel`, `DependencyPins`, `DaffQueuedApollo`, `SpeechSynthesisUtterance`, `QBFilterQuery`, `GanttUpper`, `ParserState`, `CardComponent`, `IConnectionsIteratorOptions`, `React.ReactElement`, `ParameterStructures`, `D.State`, `TypeConstructor`, `NoteStateWithRoot`, `EDBEntity`, `UpdateWindowResizeSettings`, `SelfList`, `Combine`, `DesignerNodeConn`, `IServiceProvider`, `FilterEngine`, `VectorTileDataSource`, `UseLazyQueryReducerAction`, `BaseUIManager`, `NextRouter`, `FileCache`, `TracerConfig`, `FileWatcherEventKind`, `ReadableQuery`, `HistoryInteractionEvent`, `ListAssociatedResourcesCommandInput`, `ServiceRecognizerBase`, `IPagination`, `PagedAsyncIterableIterator`, `AddressVersion`, `StunProtocol`, `DocProps`, `ToneOscillatorNode`, `Bar`, `UseComponent`, `ClientRequest`, `CreateDomainNameCommandInput`, `RenderStatistics`, `NotebookNamespace`, `TwStyle`, `RouteNotFoundException`, `DappKitRequestMeta`, `FloatAnimationTrack`, `VoiceState`, `SourceMapSource`, `ObjectConsumer`, `ResolveOutputOptions`, `algosdk.Transaction`, `requests.ListInstancePoolsRequest`, `Villain`, `Http`, `RequestParameters`, `BaseEvent`, `VirtualElement`, `ResponseOptions`, `IListenerAction`, `CrudService`, `UsageCollector`, `SVGRectElement`, `HTMLCanvasElement`, `VerificationToken`, `FlowType`, `Extra`, `SelectorsSource`, `UseRefetch`, `KeyToDiffChangeMap`, `AwaitedMessageEntry`, `GetMasterAccountCommandInput`, `ScopeManager`, `SubState`, `LeaseOperationResponse`, `ListBranchesCommandInput`, `Dual`, `Swizzle`, `TrackedPromise`, `ts.TypeChecker`, `IMatch`, `UILayoutGuide`, `FileElement`, `HTMLDetailsElement`, `LoggerTimeSpan`, `SavedObjectsBulkResponse`, `PropertySignatureStructure`, `PDFAcroTerminal`, `UISettingsStorage`, `ScanRunResultResponse`, `Film`, `EventMap`, `LiftedState`, `SVGGraphicsElement`, `Geocoder`, `SVBool`, `StructDef`, `PLSQLCursorInfosVSC`, `AlertsProvider`, `QueryToken`, `DatabasePool`, `CompletionContext`, `DataModels.Correlations.ProcessInstance`, `HtmlElementNode`, `PlanGraph.Entities.GraphData`, `ICellx`, `PatchFile`, `EntityManager`, `PlasmicContext`, `DeleteReportDefinitionCommandInput`, `Update`, `Lobby`, `EsHitRecordList`, `OperationVariant`, `test.Test`, `Softmax`, `DeleteDataSetCommandInput`, `SharesService`, `PeriodicWave`, `TooltipState`, `TypeTreeNode`, `DropletInfo`, `FakeInput`, `Mentor`, `ISubscriptionContext`, `FabricPointerEvent`, `JointTransformInfo`, `CodeActionProvider`, `IElement`, `RecipientMap`, `XPCOM.nsIXULWindow`, `MeshPrimitive`, `HdDogePaymentsConfig`, `TNoteData`, `EmitTextWriterWithSymbolWriter`, `ErrorCode`, `PreferencesCategories`, `LocKind`, `CaseDesc`, `AlertStatus`, `DescribeTagsCommandOutput`, `InputCurrencyOutput`, `GoodGhostingInfo`, `IntegrationInfo`, `LiveListItem`, `BoundEventAst`, `CaretOptions`, `requests.ListGiVersionsRequest`, `ApiTypes`, `ForwardRefComponent`, `IDiagram`, `ArenaSceneExtraProps`, `DevServer`, `SnapshotIn`, `ABLMethod`, `StepChild`, `UpdateChannelParams`, `MatMenuItem`, `Classifier`, `RegionInfoProvider`, `CoapPacket`, `SqlTuningTaskSqlDetail`, `EventTrigger`, `CollapsableSidebarContainerState`, `IntersectionState`, `TClient`, `RawShaderMaterialParameters`, `k8sutils.KubeClient`, `requests.ListGoodBotsRequest`, `Themed`, `SelectModel`, `dataStructures.BufferMap`, `IStackTokens`, `MessengerTypes.Attachment`, `PackageJSON`, `TrueGold`, `ParseValue`, `AnimatedClock`, `Validity`, `ConfigurationCCSet`, `ServeD`, `Segment3`, `ListDatasetImportJobsCommandInput`, `HttpEnv`, `PkgConflictError`, `ExportedConfigWithProps`, `SVGIconProps`, `child_process.SpawnOptions`, `HttpClientResponse`, `PipelineRelation`, `MiddlewareType`, `PbEditorElementPluginArgs`, `FetchEvent`, `SystemVerilogImportsInfo`, `ModelDeploymentType`, `android.animation.Animator`, `Callout`, `CkElementProps`, `FutureWallet`, `MsgUpdateDeployment`, `ModifyDBClusterEndpointCommandInput`, `StartPipelineExecutionCommandInput`, `TreeSelectionModificationEventArgs`, `t.STStyle`, `RequestHandlerContextProvider`, `FnN2`, `AreaService`, `DefaultDeSerializers`, `requests.ListOnPremConnectorsRequest`, `ContextProps`, `ScenarioCheckResult`, `tf.Tensor5D`, `IMdcChipElement`, `TD.DataSchema`, `CSSBlocksJSXAnalyzer`, `InterfaceImplementation`, `SendManyOptions`, `ListInstancesRequest`, `OptionNameMap`, `i18n.TagPlaceholder`, `TXReport`, `Operator.fλ`, `MapMaterialAdapter`, `CreateIPSetCommandInput`, `SequelizeModuleOptions`, `BrowseCloudBatchJob`, `androidx.transition.Transition`, `RequestsService`, `BMMessage`, `TypeConstraint`, `EventStoreDescription`, `DescribeAppInstanceUserCommandInput`, `SectionsType`, `ViewWithBottomSheet`, `ListOdaInstancesRequest`, `IManualTimeInput`, `NodeWithOrigin`, `VirtualGroup`, `CaptureStdout`, `TestReport`, `TruncatedNormalArgs`, `RenderOutput`, `ISampler`, `ESTree.Identifier`, `JSONRPC`, `ImageSegmenterOptions`, `TestCreditCardPack`, `LambdaType`, `LegacyVars`, `FailedJob`, `DiscordBridgeConfig`, `MetricFilter`, `THREE.Box2`, `PiEditUnit`, `LocationId`, `HSLColor`, `knex.Transaction`, `StarPiece`, `IJSONSchema`, `AttachmentID`, `HelpCenterService`, `MockBaseElement`, `ITKeyApi`, `DiscoverSidebarProps`, `ListNotebookSessionsRequest`, `CmsModelField`, `SparseVec`, `SlideData`, `FunctionBody`, `LayerType`, `CoreProcessorOptions`, `ImageItem`, `ShaderSocket`, `MatcherHintOptions`, `WebSocketProvider`, `WhiteListEthAsset`, `MappedTypeNode`, `SymmetricCryptoKey`, `PipelineDescriptor`, `Accessibility.SeriesComposition`, `CalendarViewType`, `ReplyMsgType`, `Quickey`, `CreateSchemaCommandInput`, `requests.ListKeysRequest`, `MemberNames`, `PiEditor`, `IFilterContext`, `Job`, `UserAction`, `ArticlesService`, `DescribeEndpointsResponse`, `TooltipOffset`, `AppointmentId`, `PossibleValues`, `SharedTreeSummaryBase`, `IIncome`, `express.Response`, `Int64`, `unist.Node`, `ListExportsRequest`, `ProcessorModule`, `SimNode`, `DeleteBuilder`, `QuickPickOptions`, `GenerativeToken`, `FuncType`, `App.services.IPrivateBrowsingService`, `Orderbook`, `ProofreadRuleMatch`, `CharacteristicGetCallback`, `StateNodeConfig`, `RegisterCr`, `ComponentFactory`, `Refiner`, `ExpressLikeResponse`, `Product`, `WebGLTimingInfo`, `StoreValue`, `CreateConfigurationSetCommandInput`, `CanvasIconTypes`, `IAnyExpectation`, `GluegunFileSystemInspectTreeResult`, `ScopedLogging`, `IAddOrInviteContext`, `Toc`, `Dir`, `TokenDetailsWithBalance`, `LangState`, `AggResponseBucket`, `xyDatum`, `ThermostatFanModeCCSet`, `QueryHook`, `MapSimulation3D`, `ListJobShapesRequest`, `providers.Provider`, `InternalOpAsyncExecutor`, `AnimationPlayer`, `LangType`, `Path0`, `ColumnProp`, `PanelProps`, `ExecaSyncReturnValue`, `LiteralContext`, `IPackageInfo`, `Affect`, `AttributeKey`, `OpenSearchQueryConfig`, `Cheerio`, `TBase`, `Docfy`, `vscode.Diagnostic`, `CreateImportJobCommandInput`, `Finding`, `ConfigPlugin`, `PersistedStatePath`, `ServiceLocator`, `StateDeclaration`, `CfnPolicy`, `MessageThreadStyles`, `IHttpRequest`, `oicq.Client`, `GeometryData`, `PendingResult`, `PuppetBridge`, `KernelBackend`, `ProfileStore`, `WatchService`, `DOMEventName`, `RangeBasedDocumentSymbol`, `UnlitMaterial`, `ToastPosition`, `ConsensusMessage`, `BinarySensorCCReport`, `ConstantAst`, `GeometryObject`, `MalNode`, `SessionAuthService`, `BoostStyleProps`, `ast.ExternNode`, `AnimatedMultiplication`, `StatusMessageService`, `VisualizationsPlugin`, `thrift.TType`, `DependencyPair`, `Datafile`, `ClassWriter`, `BookmarkHelperService`, `DeepMapResult`, `UnsubscribeFn`, `PerformanceObserver`, `ProcessListener`, `PostgrestResponse`, `SlateEditor`, `GameSize`, `requests.ListBdsMetastoreConfigurationsRequest`, `T6`, `UIStorage`, `DeleteIntegrationCommandInput`, `ActionState`, `FabricWallet`, `TransactionAction`, `Auto`, `JoinedReturnType`, `GravityType`, `WithContext`, `IntLiteralNode`, `FooterComponent`, `Collector`, `PredicatePlugin`, `DayPeriod`, `I80F48`, `EventDetails`, `GraphQLObjectType`, `BoardState`, `CodeSnippet`, `BasicColumn`, `BluetoothRemoteGATTServer`, `Thought`, `AutorunFunction`, `SubmissionService`, `ListTranscriptionJobsCommandInput`, `IYamlApiFile`, `DunderAllInfo`, `SwitchEventListener`, `NodeBank`, `CreateTodoDto`, `AdonisRcFile`, `SRoutableElement`, `CreateDatasetResponse`, `FSOperator`, `WebCryptoEncryptionMaterial`, `TransformKey`, `TsSafeElementFinder`, `HomeView`, `TransactionJSON`, `ToolsService`, `CreateIndexCommandInput`, `AccountsScheme`, `ParseTreeResult`, `CallOverrides`, `ActionSheetOptions`, `LoaderOptions`, `IPropertyOption`, `CopyAuthOptions`, `ArgOptions`, `ConvertedDocumentUrl`, `Fanduel`, `MarketInfo`, `ICountryModel`, `CLM.AppBase`, `HasuraModuleConfig`, `TestIamPermissionsRequest`, `AccountsService`, `DescribeTagsCommandInput`, `ChangeAuthMode`, `IFunctionCall`, `LogicalQueryPlanNode`, `ExpiryMap`, `LiveAtlasPlayer`, `HumanData`, `RnM2Accessor`, `UpdateProjectCommandOutput`, `Oracle`, `IMonitoringFilter`, `Controlled`, `TypeSystemPropertyName`, `GitlabUserRepository`, `Intervaler`, `MouseButton`, `MergeConfig`, `Weight`, `SeedFile`, `Introspector`, `NotificationMessage`, `NumberToken`, `VideoRateType`, `PluginConfig`, `DriverMethodOptions`, `CallCompositeStrings`, `CommentItem`, `Datafeed`, `IProgress`, `AutoImportResult`, `SimpleRNNCell`, `PrismaClientRustErrorArgs`, `QuadrantRow`, `ComplexType`, `GraphRecord`, `FirmaWalletService`, `Matrix2x3`, `IHistoryRecord`, `LocationCalculatorForHtml`, `models.ISegement`, `BlockLike`, `sdk.SpeechSynthesisResult`, `PluginsServiceStartDeps`, `ReadyValue`, `QuerySuggestionGetFn`, `fromReviewerStatisticsActions.GetReviewerStatisticsResponse`, `jest.Mocked`, `CachingRule`, `ListPackagesCommandInput`, `PxtNode`, `ERC1155OperatorMock`, `SinonStubbedInstance`, `ThExpr`, `IItemRendererProps`, `Controller`, `LocationState`, `EmitOptions`, `DraftEntityInstance`, `Relationship`, `DefinitionLocation`, `RangeSliderProps`, `ts.ImportEqualsDeclaration`, `MomentumOptimizer`, `CommandResponse`, `PageMargins`, `ChatChannel`, `Shelf`, `FluentDOM`, `CancelablePromise`, `HTMLSlotElement`, `DestinationJson`, `PerformanceTiming`, `ControlFlowInfo`, `ConfigUpsertInput`, `ModeController`, `CreateFunctionCommandInput`, `IDData`, `ChannelType`, `model.Model`, `ICategoricalStatistics`, `Interpolations`, `ExpNumBop`, `ReporterRpcClient`, `MicrosoftSynapseWorkspacesResources`, `UpdatePackageCommandInput`, `ManagementAgentPluginAggregation`, `SelectRangeActionContext`, `PickFunction`, `KeyframeIconType`, `SpyInstance`, `d.CompilerRequest`, `SampleUtterances`, `PAT0_TexData`, `IDataRow`, `SQLDatabase`, `Pitch`, `DMMF.Mappings`, `GetAttributeValuesCommandInput`, `DataTypeFactory`, `RenderFlags`, `SavedObjectMigrationMap`, `ExpressionRendererEvent`, `TestRenderNode`, `InternalStores`, `ThySkeletonComponent`, `GlobalNames`, `RepairTask`, `ProjectExport`, `Quakeml`, `ZeroPadding2DLayerArgs`, `PlayerPosition`, `ResourceIdentifier`, `ColumnComponent`, `CreateDataSetCommandInput`, `FileSystemConfig`, `EventBus`, `DiscogsReleaseInfo`, `ProductControlSandbox`, `DataProviderProxy`, `SankeySeries.ColumnArray`, `MapPlayer`, `ts.Expression`, `ITransferProfile`, `PDFArray`, `AccentIconStyles`, `V1Container`, `ActionTree`, `Players`, `TypeLiteralNode`, `ConchVector4`, `ItemController`, `NotifyArgs`, `DefinitionFilter`, `Journey`, `AbbreviationMap`, `PackageJson`, `WindowLike`, `SchemaCxt`, `NotificationSettings`, `DataValue`, `ChartConfiguration`, `ChatClientState`, `HintFile`, `ParserMessageStream`, `ReadonlyJSONObject`, `QueryDeploymentResponse`, `AbstractNode`, `BufferView`, `MotionState`, `GeometryStateStyle`, `DescribeEndpointsCommandInput`, `DokiSticker`, `DBCore`, `IDateRangeInputState`, `JestEnvironmentGlobal`, `Wins.RankState`, `SecretsService`, `babel.Node`, `ITextureInfo`, `StepIterator`, `IOProps`, `ProjectInfo`, `RecoilState`, `CollapsibleListProps`, `MenuService`, `StoryArgs`, `LoginAccountsValidationResult`, `IGameState`, `TokensPrices`, `ArrayOption`, `ManagerOptions`, `TextAreaProps`, `requests.ListCloudVmClusterUpdateHistoryEntriesRequest`, `GetResourcesCommandInput`, `LegendPath`, `ParseIterator`, `TodoListRepository`, `FileInode`, `WorkspaceFolderConfig`, `AnnotationAnalyticsAggregation`, `LinkService`, `IAngularMyDpOptions`, `WorkspaceMap`, `PlanItem`, `ScaleConfigs`, `FileReference`, `ModelRef`, `StreamResetOutgoingParam`, `LucidRow`, `vscode.ViewColumn`, `AccuracyEnum`, `RadioButtonViewModel`, `Strategy`, `HighRollerAppState`, `IApiProfile`, `PropertyFilter`, `OutputTargetEmptiable`, `ChainID`, `IRawHealthStateCount`, `FabSpecExports`, `NotificationCCAPI`, `Blobs`, `KdNode`, `DataStateClass`, `RegisterOutput`, `AsyncUnorderedQueryFlow`, `GraphQLTypeInformer`, `SphericalHarmonicsL2`, `a.Type`, `ProjectItem`, `ArgumentBuilder`, `Kubeconfig`, `StackLayout`, `GeneratePipelineArgs`, `StyleScope`, `ReferencingColumnBuilder`, `UnionOptions`, `StartMeetingTranscriptionCommandInput`, `Conv2DInfo`, `OnPostAuthHandler`, `Frontstage1`, `HSD_JObj_Instance`, `Quill`, `TextNode`, `IAlbum`, `TutorialSchema`, `PrimitivePolygonDrawerService`, `RepositoryStatistics`, `LighthouseBudget`, `JsonUnionsCommandInput`, `BotResponseService`, `CalendarDate`, `CustomPaletteState`, `IGlobalState`, `TemplateCategory`, `UploadApiResponse`, `MaybeTypeIdentity`, `MiddlewareOverload`, `ITrackDescription`, `PSTTableBC`, `IClusterContext`, `CounterAction`, `SafeAny`, `FormContextValue`, `IProc`, `OpticsDomain`, `States`, `Forward`, `NationalTeam`, `UploaderBuilder`, `RootLabel`, `IAuthenticateOidcActionConfig`, `CipherData`, `MessageGroup`, `CreateMembersCommandInput`, `DeleteObjectRequest`, `TaroElement`, `DAL.DEVICE_ID_DISPLAY`, `ConnectedPosition`, `PoolInfo`, `ScopedProps`, `FlexParentProps`, `Disposer`, `LibSdbTypes.Contract`, `GetGroupResponse`, `PluginDescriptor`, `RenameFn`, `Knex`, `EngineConfigContent`, `UITapGestureRecognizer`, `NoticeProps`, `UseQueryResponse`, `R3`, `TSESLint.RuleContext`, `MWCListIndex`, `OutputStream`, `ChatNode`, `onChunkCallback`, `ChartDataSet`, `KeyAttribute`, `AnyData`, `GfxCoalescedBuffersCombo`, `Coordinates`, `PluginsSetup`, `KernelConfig`, `FlattenedFunnelStep`, `ODataApi`, `SerialFormat`, `LuaThread`, `OutputTargetAngular`, `UpdateWebhookCommandInput`, `ListLeaderboardRecordsAroundOwnerRequest`, `InsertQuery`, `PayoutMod`, `AsyncActionType`, `VisualizePluginStartDependencies`, `AutoAcceptCredential`, `SLL`, `ApplyPath`, `ModalInstance`, `PartialCanvasTheme`, `ExtractorConfig`, `IssueAnnotationData`, `ViewRef`, `Workbook`, `styleFn`, `DataRepository`, `ILiteralExpectation`, `OutStream`, `ImplicationProofItem`, `ThrottleSettings`, `ApolloTestingController`, `ThemeName`, `PlayerId`, `XUL.menupopup`, `BaseInterface`, `IFavoriteColors`, `SoftwareModel`, `SeriesDoc`, `JsonDocsTag`, `ICreateChildImplContext`, `CacheQueryOptions`, `CreateDatasetRequest`, `GlobalVariables`, `FetchService`, `XConfigService`, `DownloadInfo`, `GraphicsLayerOptions`, `Arena`, `FlowsenseUpdateTracker`, `providers.TransactionRequest`, `ClrFlowBarStep`, `ContextMenu`, `IBLEAbstraction`, `StyledDecorator`, `requests.ListConfigurationsRequest`, `ODataQueryOptionsHandler`, `NodeConstructor`, `ITaskRunnerDelegates`, `TelemetryPluginStart`, `IEthUnlock`, `AlterTableModifyColumnBuilder`, `LendingReserve`, `msRest.OperationURLParameter`, `TermRows`, `TransformFnParams`, `BaseChannel`, `Entitlement`, `ILiquorTreeNode`, `ExtendedWebSocket`, `SearchCommandInput`, `MouseEvent`, `GeoPoint`, `VisualizeEmbeddableConfiguration`, `LocationReference`, `IPatchData`, `NumberFormatter`, `ServiceInterface`, `cToken`, `mjAlerts`, `Music`, `requests.ListNotebookSessionShapesRequest`, `PrefBranch`, `Events.pointerenter`, `Forecast`, `DinoRouter`, `IAdministrationItem`, `LambdaCloseType`, `UseSubscriptionReturn`, `Core`, `PlotConfigObject`, `ClozeDeletion`, `CodeGenExecutionItem`, `AnalyticsFromRequests`, `AppInitialProps`, `BreakpointKey`, `ObservableQueryBalances`, `SuiteWithMetadata`, `AstPath`, `IBackoffStrategy`, `DeleteFolderCommandInput`, `TextStyleDefinition`, `SearchSessionDependencies`, `ViteDevServer`, `SearchStrategyRequest`, `StateInfo`, `GitBlameLines`, `EElementSignature`, `IPermissionSearchFilters`, `Eth`, `ILecture`, `UpdateApplicationResponse`, `PrRepoIndexStatistics`, `InputMethod`, `mb.IRecording`, `WidgetZoneId`, `ShippingService`, `PmpApiConfigService`, `QueryArg`, `ReducerWithInitialState`, `ThLeftExpr`, `IGrid2D`, `INodeInputSlot`, `MemoryStorage`, `RegionFieldsItem`, `GameFeatureObject`, `MockControllerAdapter`, `PostsContextData`, `StopExecution`, `html.Element`, `Constraint2DSW`, `Commander`, `Crdp.Runtime.RemoteObject`, `IHandler`, `PermissionResponse`, `IStringStatistics`, `NavigationEntry`, `FakeContract`, `requests.ListManagedInstanceErrataRequest`, `DynamoDB.BatchGetItemInput`, `FunctionLikeDeclaration`, `d.FsReaddirItem`, `SidebarState`, `WexBimGeometryModel`, `SharedString`, `ChatServer`, `Slots`, `ImportSpecifierArray`, `TagsFilter`, `Dereferenced`, `CallbackResult`, `ListEndpointsCommandInput`, `FunctionPlotDatum`, `RoomModel`, `CommandEnvelope`, `TypePackage`, `AutoScalingPolicy`, `LocalRegistry`, `ISearchSource`, `NamePath`, `HubInfo`, `AddApplicationInputProcessingConfigurationCommandInput`, `ColorSpace`, `ServerErrorInfo`, `WaitInfo`, `MpProduct`, `MultipleTypeDeclaration`, `CreateBackupResponse`, `CalendarUnit`, `RenameLocation`, `AppConfigService`, `ParsedFile`, `IScopedClusterClient`, `WebGLSampler`, `GLuint`, `IScrollerInfo`, `Utility`, `PhotosaicImage`, `UrlFormat`, `DataTablePagerComponent`, `ListJobTemplatesCommandInput`, `WidgetRegistry`, `StorageEngine`, `BlockbookConnectedConfig`, `INetworkNavigatorNode`, `ChannelHandler`, `BaseNode`, `DBProperty`, `ExtendedTypeScript`, `IconBaseProps`, `Client`, `NextService`, `StyleProp`, `QueryHookOptions`, `TraceNode`, `HttpErrorHandler`, `TileImageSize`, `AuthenticationParameters`, `IUiState`, `CategoryEntity`, `TreeNodeLocation`, `CallArguments`, `TreeRepository`, `fabric.IObjectOptions`, `IStepAdjustment`, `IForwardIterator`, `ListWorkRequestErrorsResponse`, `IProductCreateInput`, `SidebarProps`, `SnapshotConnection`, `ColorValue`, `CreateMediaDto`, `NewLineToken`, `Stringifier`, `CertificateAuthorityLifecycleState`, `ExampleFlatNode`, `IChunkOffsetBox`, `BarRectangleItem`, `ConnectionLocator`, `inquirer.Answers`, `ResolvedEntitySchema`, `IFunctionParameter`, `LightGroupState`, `NormalizedOutputOptions`, `ScannedProperty`, `TreemapSeriesOptions`, `ElementsDefinition`, `ICharaProfile`, `IListMultipleData`, `STSortMap`, `CSharpType`, `InsertionEdit`, `SWRConfigInterface`, `ts.TypeParameterDeclaration`, `SignedStateReceipt`, `IEventCategory`, `GetPolicyResponse`, `ParsingMetadata`, `FetchMock`, `apid.CreateNewRecordedOption`, `GenericTreeItem`, `TLang`, `ArianeeTokenId`, `SetBreadcrumbs`, `IOptimized`, `OpenSearchDashboardsReactNotifications`, `requests.ListVmClusterPatchesRequest`, `NotifierService`, `AWSPolicy`, `SkinId`, `EditorCompletionState`, `LetterStyle`, `ScalarTypeComposer`, `d.CompilerJsDoc`, `ListPicker`, `LogicCommand`, `DeletePublicAccessBlockCommandInput`, `HookEvent`, `CombinedThingType`, `IDBRequest`, `PropertyName`, `CommandEntry`, `ConnectionFetcher`, `PreferenceService`, `Loadable`, `TestFunctionImportComplexReturnTypeParameters`, `AnyConfigurationSchemaType`, `CollectionBundleManifest`, `VariableStatementStructure`, `IProblem`, `ExplicitPadding`, `ShoppingCartService`, `requests.ListTsigKeysRequest`, `RSS3Index`, `MaxVersions`, `HeaderTransformer`, `AnalyzerNodeInfo`, `ListDomainsResponse`, `ValueAttributeObserver`, `Got`, `GetJobResponse`, `ObjectCallback`, `Waypoint`, `SuperExpression`, `OrderbookResponse`, `Rx.Notification`, `HierarchyCircularNode`, `RawRustLog`, `Placement`, `NPC`, `FieldResolver`, `TopUpProvider.RAMP`, `Track`, `MultiSigSpendingCondition`, `MatchSpecific`, `SModelElementSchema`, `Highcharts.JSONType`, `BinaryBuffer`, `CustomCameraControls`, `GetResponse`, `IterableX`, `MutableVector2`, `DocumentRegistryBucketKey`, `NodeContext`, `AuthenticationProvider`, `HeaderColumnChainRow`, `StatusFieldProps`, `TestProject`, `SecureNote`, `ChannelMessageAck`, `ConfigVersion`, `ProductType`, `StorageUtility`, `ts.BindingElement`, `BaseQuery`, `MutableMatrix22`, `Creature`, `ArtifactEngine`, `ForcedRetryErrorInfo`, `RSAEncryptionParams`, `OpenTarget`, `MessageItem`, `IMyValidateOptions`, `StixObject`, `AnchorMode`, `ProcessQueue`, `FormProps`, `ExtraComment`, `InternalPlugin`, `PlacementTypes`, `ObjectSchema`, `ContractPrincipal`, `SavedObjectsResolveImportErrorsOptions`, `HTTPResponseBody`, `ObjectSetting`, `ResolveIdResult`, `CRS`, `FieldSetting`, `SelectionTree`, `ApiMockRoute`, `server.Server`, `People`, `instantiation.IConstructorSignature8`, `CreateContextOptions`, `SeparableConvLayerArgs`, `Incoming`, `CubeFace`, `DeviceMetadata`, `ChartConfig`, `ValidationArguments`, `ZRRawEvent`, `PutResourcePolicyCommand`, `GaxiosPromise`, `GRULayerArgs`, `ConnectionEvent`, `MessagePriority`, `PaginationConfig`, `ListBackupsRequest`, `Hsv`, `requests.ListIdentityProvidersRequest`, `CocSnippetPlaceholder`, `SpecQueryModelGroup`, `MergeableDeclarationSet`, `GraphExecutor`, `AlertingRouter`, `DependencyTracker`, `CustomQueryHandler`, `Cookies`, `RegionCardinality`, `CanvasTypeVariants`, `SinglePointerEvent`, `PutBucketPolicyCommandInput`, `Types.Id`, `PinchGestureEventData`, `CmsGroupPlugin`, `TRPCClientError`, `UserFacingSerializedSingleAssetDataTypes`, `TestDecorator`, `UIRoastingMachineStorage`, `TypeRegistry`, `InterceptorFn`, `FileDataMap`, `ResolvedConceptAtomType`, `IApplicationState`, `EnvelopeListener`, `DropResult`, `SfdxFalconRecipeJson`, `ClientSideSocket`, `Dependence`, `UrlGeneratorId`, `RevalidateEvent`, `LexerActionExecutor`, `CodeQualityInformation`, `RoomData`, `BlockFactorySync`, `Formula`, `ContextTypes`, `OpenEditorNode`, `DAL.DEVICE_ID_BUTTON_AB`, `ViewFunctionConfig`, `PodDataPoint`, `TSelectedItem`, `PickItem`, `DescribeUsersCommandInput`, `LITestService`, `MActorId`, `Cwd`, `DatabaseReference`, `ListModelDeploymentShapesRequest`, `SmoldotProvider`, `SegmentDetail`, `PolkadotConnection`, `SvgProps`, `FinalInfo`, `NotebookDocument`, `GeoProjection`, `SqrlRuleSlot`, `es.CallExpression`, `HOC`, `ScrollIntoViewOptions`, `requests.ListAutonomousDatabaseDataguardAssociationsRequest`, `ChangeMap`, `PinModelData`, `GetRRSetResponse`, `GameConfig`, `CssItem`, `AlfrescoApiService`, `DeleteWebhookCommandInput`, `Lib`, `Buntstift`, `EventAttendance`, `ColumnNode`, `ParamsOptions`, `ListTagsForResourceCommandOutput`, `ExecutionDriver`, `AstMetadataApiWithTargetsResolver`, `Ast`, `MinecraftLocation`, `ITransformerHandleStyle`, `PipeCallback`, `InMemoryLiveQueryStore`, `OutputDefinitionBlock`, `DataViewValueColumn`, `Templates`, `Ops`, `WindowService`, `HookName`, `Optional`, `YACCDocument`, `AlarmAction`, `TSESTreeToTSNode`, `ClockMock`, `Utxo`, `KeyringTrace`, `SearchInput`, `PageMaker`, `CornerSite`, `FileUploadService`, `ImageViewerProps`, `ExtendedChannelAnnouncementMessage`, `MerchantGameWinningEntity`, `StringLiteralNode`, `JConfiguration`, `messages.IdGenerator.NewId`, `GridOptions`, `StaticOperatorDecl`, `ISeries`, `LockMode`, `DistanceExpression`, `CodeMirrorAdapter`, `AccountJSON`, `CompletionResults`, `NonFungibleAssetProvider`, `PartialVisState`, `CmdParts`, `SubtitlesTrack`, `BigInt`, `QRFunction`, `GetCoordinate`, `utils.RepositoryManager`, `SigninOrSignupResponse`, `LoadParams`, `QTMCounterState`, `ValuesDictionary`, `DateKey`, `TLayoutSize`, `ObjectValueNode`, `GaxiosOptions`, `ModuleType`, `GX.TexFormat`, `FirebaseFirestore.DocumentReference`, `KibanaExecutionContext`, `models.ChatNode`, `requests.ListConnectionsRequest`, `Face`, `ComponentCompilerTypeReference`, `SetStateCommitment`, `ClientMenuOrderIdDTO`, `ImportParts`, `SPBatch`, `Events.hidden`, `PackageManagerType`, `BasketSettings`, `LabIcon`, `Sprite3D`, `SecurityAlertPolicyName`, `StateAction`, `SqliteDatastore`, `NSDateComponents`, `Fee`, `IHashMapGeneric`, `Mock`, `InitChunk`, `EnvPaths`, `IClassExpectation`, `lsp.Location`, `MatchedStep`, `ErrorReason`, `BSplineCurve3dH`, `OrganizationAccount`, `GaiaHubConfig`, `KeyStore`, `TinderLike.Props`, `TreeBacked`, `CustomConfigurationProvider`, `MockRequestParams`, `MapFnOrValue`, `UnionOf`, `requests.CreateJobRequest`, `MToonMaterial`, `ApiOperation`, `Item`, `VideoSettings`, `ReferenceList`, `CanvasLayer`, `PvsResponse`, `FieldMetadata`, `SymbolFlags`, `d.HydrateResults`, `AttributeFlags`, `IMusicMeta`, `TSExpr`, `InputConfiguration`, `FromSchema`, `HoldingUpdatedArg`, `HTMLBaseElement`, `ObserverLocator`, `UICommand`, `FilesystemNode`, `ThemeConfig`, `StatsCompilation`, `LexPosition`, `SearchForLife`, `MarkConfig`, `PlatformRender`, `IObjectInspector`, `MixedObject`, `ReviewItem`, `MaybeAccount`, `DataTable`, `Json2Ts`, `TextAreaComponent`, `CompleteMultipartUploadCommandInput`, `TEX1_TextureData`, `BuddyWorks`, `FilePickerBreadcrumbItem`, `LanguageSettings`, `Implementation`, `BaseClusterConfig`, `SourceRule`, `UntagResourceResult`, `IndexerManagementModels`, `T15`, `ClassNameMap`, `Realm.Object`, `StateIO`, `ChooseImageSuccessCallbackResult`, `JsonParserTransformerContext`, `DefaultRouterOptions`, `AccessTokens`, `IMyFavouriteItem`, `SKFrame`, `GLclampf4`, `ast.LookupNode`, `CancelJobRequest`, `QueryIdempotencyTokenAutoFillCommandInput`, `PostProcessor`, `GetObjectRequest`, `SocketUser`, `FileSystem`, `TSClientOptions`, `AssociationConfig`, `IAmazonImage`, `GetReferenceOptions`, `BaseHandlerCommonOptions`, `MessageRequest`, `AzureClusterProvider`, `ShaderInstance`, `UnionAccumulator`, `SettingsPriority`, `UnvalidatedIndexingConfig`, `FormAction`, `requests.ListBucketsRequest`, `PluginSettings`, `SpectatorHostFactory`, `StatusVectorChunk`, `PostProcess`, `PinMap`, `DbService`, `PubPointer`, `CLDRFramework`, `PIXI.DisplayObject`, `E2EElement`, `BasicJewishDate`, `NestedPageMetadata`, `MerchantUserService`, `MdcTabScrollerAlignment`, `Bundle`, `JGOFMove`, `ChannelMessageRemove`, `SubqueryRepo`, `ISerialFormat`, `PlistValue`, `d.RollupConfig`, `ObjectPredicate`, `IPermission`, `IRequestOptions`, `rp.OptionsWithUrl`, `DisplayState`, `TSParseResult`, `JsDoc`, `TableOfContentsEntry`, `ErrorBarSelector`, `STColumnFilter`, `ViewportHandler`, `ID3v2MajorVersion`, `UpdatePortalCommandInput`, `PawnFunction`, `AlainAuthConfig`, `TagsListItem`, `ReadonlyDeep`, `CommandMap`, `Addressable`, `Keplr`, `PrStatistics`, `ToolManagerService`, `Protocol.Network.ResponseReceivedExtraInfoEvent`, `SelectTokenDialogEvent`, `AccessTokenScopeValidator`, `ts.PrefixUnaryExpression`, `MessageDocument`, `ITimeOffCreateInput`, `IncompleteTreeNode`, `uElement`, `WeierstrassPoint`, `CommandContext`, `SyncDoc`, `FetchRequestId`, `AndOptions`, `UseQueryReturn`, `ComponentPublicInstance`, `TerminalProviderSeverity`, `HttpArgumentsHost`, `DocumentDeltaAtomicUpdate`, `LeftCenterRight`, `IntermediateTranslation`, `PopoverPlacement`, `GX.CC`, `Highcharts.AnnotationsOptions`, `ResolutionKindSpecificLoader`, `ConvWithBatchNorm`, `ServiceClientCredentials`, `ConnectedSpace`, `ChartjsComponentType`, `Types.EditableTitleState`, `RegisterValue`, `IStatusButtonStyleProps`, `NullableT`, `MockServiceClient`, `CheckSavedObjectsPrivileges`, `LayerGroup`, `Thing`, `EventUiHash`, `IViewport`, `AzExtTreeDataProvider`, `Author`, `PluginInterface`, `SenseEditor`, `RpcResult`, `RestServer`, `DataEventEmitter.EventCallback`, `CampaignsModelExt`, `Brand`, `PathData`, `DiagnosticChangedEventListner`, `OAuthAuthCode`, `QuerySettings`, `TranslationItemBase`, `Definitions`, `LocationLink`, `StartExperimentCommandInput`, `TextureId`, `PoiLayer`, `MutableVector2d`, `GenericStyle`, `Z64Online_ModelAllocation`, `ImageViewerState`, `bitcoin.Psbt`, `WrongDependencies`, `TutorialDirectoryHeaderLinkComponent`, `DataTransfer`, `StoreNode`, `IZoweTreeNode`, `AppThunk`, `MockPort`, `IFileSystem`, `jest.MockedFunction`, `IDragData`, `ReduxCompatibleReducer`, `ParsedMail`, `NewableFunction`, `TestTree`, `DeclarationReflection`, `QualifiedName`, `InstanceOptions`, `messages.TestStepResult`, `RhoProcessor`, `MultilevelNode`, `MockTokenTransferProxyContract`, `ParameterOptions`, `NoticeToastRequest`, `DecoratorConfig`, `LyricFont`, `NumberValueSet`, `IZoweDatasetTreeNode`, `solG1`, `GetRecommendationsCommandInput`, `ModalHelper`, `ITargetGroup`, `Events.pointerdown`, `PropertyMetadata`, `ast.BinaryNode`, `fromAuthActions.Login`, `CallErrorTarget`, `STIcon`, `GfxReadback`, `HdRipplePayments`, `IGceDisk`, `IListItemAttrs`, `R1`, `TreeDataSource`, `TimelineDivisionBase`, `TableServiceClient`, `GrayMatterFile`, `GBDeployer`, `RenderOptionFunction`, `CreateUserInput`, `ICreateSessionOptions`, `IConfigurationModify`, `MapAnchors`, `OwnerService`, `QuizLetter`, `AccessorDeclaration`, `EChartsType`, `BigNumber.BigNumber`, `KVStore`, `DeleteApplicationInputProcessingConfigurationCommandInput`, `SchemaKey`, `StorageAccount`, `TopNavMenuProps`, `DangerDSLType`, `FreezerInstance`, `Screenshot`, `requests.ListPingProbeResultsRequest`, `Redis.RedisOptions`, `DaffExternallyResolvableUrl`, `LoadingEvents`, `ApolloServerExpressConfig`, `ApplyPendingMaintenanceActionMessage`, `VercelRequest`, `CoinMap`, `CellInterval`, `MDCDialogPresentationControllerDelegateImpl`, `DocumentPosition`, `IDropDownTreeViewNode`, `AjvFactory`, `SeparatedNamedTypes`, `Allowance`, `LoadOpts`, `IQueryCondition`, `IfNotExistsContext`, `ODataSchema`, `IncompleteUnescapedString`, `EventAsReturnType`, `ILoaderPlugin`, `option`, `BootJsonData`, `DeployedReplica`, `ChannelData`, `PathExpression`, `AfterWinnerDeterminationGameState`, `LiveObject`, `SubEntityProps`, `ast.IfNode`, `ContextService`, `Backoff`, `ITodoItem`, `TableItem`, `GXMaterialHacks`, `EditorModel`, `PointEditOptions`, `SnippetsProvider`, `PlanningResult`, `N`, `DependencyGraphNodeSchema`, `PlaylistTrack`, `jest.Mock`, `UInt`, `ComponentDecorator`, `ManagementClient`, `AccessLevel`, `EstimateGasOptions`, `PluginManager`, `EncryptionConfiguration`, `GroupActorType`, `DraftDecoratorType`, `TransformResult`, `Type_Struct`, `google.maps.DirectionsResult`, `ABuffer`, `AppEvent`, `MetaDataOptions`, `window.ShellWindow`, `PrivateApi`, `DoorLockLoggingCCRecordReport`, `ErrorHandler`, `DecodedSourceMap`, `KeyBindingProps`, `DaffLoginInfo`, `FactoryRole`, `IntegerRangeQuantifier`, `ExecSyncOptions`, `Legend.Item`, `ITimezoneMetadata`, `TsTabCollectionComponent`, `EventSubscriptionQuotaExceededFault`, `ApolloVoyagerContextProvider`, `WorkRequestResourceMetadataKey`, `Redis.ClusterOptions`, `BodyState`, `JestExtRequestType`, `CreateDatabaseResponse`, `StashTabSnapshot`, `IFBXConnections`, `UpdateProjectInput`, `TransactionInstruction`, `CollisionCategorizedKeeper`, `KCDLoader`, `BoxSide`, `StandardSchemeParams`, `RandomFunc`, `FlightType`, `AssembledSubjectGraphics`, `TaskManagerDoc`, `InputTokenObject`, `LambdaEvent`, `AnalysisResults`, `VPC`, `LocaleTemplateManager`, `CloudFormationStack`, `Dex`, `IStdDevAggConfig`, `Vc2cOptions`, `Bridge`, `NodeWithPos`, `ItemState`, `BlobInfo`, `PopoverPosition`, `NestedValueArray`, `SubmissionCcLicence`, `UserTimeline`, `TurnContext`, `INodeIssues`, `ShowConflictsStep`, `d.StyleDoc`, `Support`, `UpdateCampaignCommandInput`, `X12Interchange`, `RegisterDr`, `MyNFT`, `TreemapSeries.NodeObject`, `AnchorBank`, `ConfigModel`, `QueryBuilderProps`, `SecurityCCCommandsSupportedReport`, `IUrlResolver`, `vscode.MarkdownString`, `HTMLScWebglBaseChartElement`, `ExpressionFunctionVisualization`, `ChromeNavControl`, `MockRequestInfo`, `IDejaDragEvent`, `ToString`, `SandDance.specs.Insight`, `RedisClient`, `ObjectAllocator`, `TableColumnWidthInfo`, `ExpressionStatement`, `PointTuple`, `NormalizedParams`, `OpenOrders`, `BarChartBarMesh`, `AppInstanceJson`, `LogicNode`, `ContactCardEmbeddable`, `SMTType`, `CarouselService`, `UITextView`, `SendRequestConfig`, `telemetry.Properties`, `Permissions`, `IConnectionParams`, `Yield`, `CheckpointNode`, `EtjanstChild`, `IDBObjectStore`, `AnnotationsProvider`, `NumId`, `SubscribeResult`, `DefaultOptionType`, `TermAggregationOptions`, `QueryLeaseRequest`, `ParsedStructure`, `Web3ReactContextInterface`, `FilenameFilter`, `ExecFileException`, `AttrState`, `AvailableFilter`, `EntityCollectionRecord`, `ElectronRoleCommand`, `ElementSelector`, `ReferenceInfo`, `NativeDeleteOptions`, `user`, `PrimitiveTypeAssertion`, `SVGTitleElement`, `NSURL`, `CombinedVueInstance`, `PointString3d`, `RecordInput`, `RobotApiResponseMeta`, `RuleTypeModel`, `WidgetOpenerOptions`, `CheckPrivileges`, `StaticJsonRpcProvider`, `Parsed_Result`, `MutationElement`, `CraftProjectConfig`, `ScheduledOperationDetails`, `DescribeJobRunCommandInput`, `ResourceInUseException`, `Year`, `Signal`, `IActionArgs`, `MethodsOrOptions`, `cg.Key`, `SeriesData`, `RangeSelector`, `ResourceMap`, `NzTabNavItemDirective`, `FlexibleConnectedPositionStrategy`, `AccountRegistry`, `UserDocument`, `StaticTheme`, `ZBarInstance`, `Discord.Guild`, `size_t`, `IHookCallbackContext`, `AddTagsToResourceCommandOutput`, `QueryProviderRequest`, `Rating`, `ElementHandle`, `StorableUrl`, `ContractCallOptions`, `AttachmentResponse`, `SearchSource`, `ArgumentCheck`, `PsbtInputData`, `MDCMenuAdapter`, `ColorObject`, `AssociationCCAPI`, `AsyncDiffSet`, `RequestSelectorState`, `LayoutRectangle`, `Element.JSON`, `Amplitude`, `ConnectionInvitationMessage`, `NotificationColumnFilters`, `StringExpression`, `AssignmentNode`, `RequestProps`, `GitFileChange`, `ParseResponse`, `EntityAction`, `BaseName`, `ReadModelPool`, `ProjectedXY`, `PouchdbDocument`, `BackgroundState`, `DateCell`, `choices`, `CertificateOptions`, `ModbusTransaction`, `Triple`, `ValidatedJobConfig`, `Security`, `UNKNOWN_TYPE`, `CertificateSummaryBuilder`, `ConnectionSetting`, `IStorages`, `IMessageEvent`, `SnapshotPublicData`, `LambdaIntegration`, `CronService`, `StatusContext`, `InternalTransition`, `DownloadRequest`, `H264RtpPayload`, `PaginationCallback`, `ListDomainsForPackageCommandInput`, `DatabaseSchema`, `UseQueryPrepareHelpers`, `NullAndEmptyHeadersClientCommandInput`, `ts.TransformationContext`, `TradeProvider`, `AuthToolkit`, `InstancesState`, `BFT`, `CommentEntity`, `SubConfig`, `BaseAppearanceService`, `AbstractAssets`, `CreateMockOptions`, `BrowserTranslateLoader`, `WatchBranchEvent`, `CompilerSystemCreateDirectoryResults`, `T.Layer`, `IBalanceValue`, `InvalidSubnet`, `PreferenceProviderDataChange`, `SentenceNode`, `FacetSector`, `ListReportDefinitionsCommandInput`, `TestObserver`, `ParticipantItemStrings`, `TNSPath2D`, `planner.Planner`, `AliasOptions`, `TaskPoolRunResult`, `Gatekeeper`, `StorageManager`, `BitcoinjsNetwork`, `argparse.ArgumentParser`, `Accidental`, `RowOutlet`, `TeamsActionConnector`, `GX.TexGenType`, `AbstractCartProxy`, `SharedModel`, `RectModel`, `track`, `IfScope`, `PlexMetadata`, `ChangedDataRow`, `GaussianNoise`, `signalR.HubConnection`, `WidgetResolveResult`, `Web`, `DAVResponse`, `InstallStatus`, `d.StyleCompiler`, `handler.Queue`, `XMLSerializer`, `types.NestedCSSProperties`, `DescribeInstanceAttributeCommandInput`, `IHttpClient`, `ng.ITimeoutService`, `RelationMetadata`, `SMA`, `QueryParams`, `GenericAnalyzer.Dictionary`, `AngularPackageLoggerMessage`, `IGlTFModel`, `EmptyClass`, `ActivityAttendance`, `IEventSubscription`, `IExportedValue`, `WebSocket.Data`, `React.RefForwardingComponent`, `Resp`, `requests.ListDynamicGroupsRequest`, `cBgS_GndChk`, `TableState`, `TimeDistributed`, `PieceAppearance`, `AuthFacade`, `CellService`, `ReverseQueryInterface`, `RawJSXToken`, `BrandState`, `TelegrafContext`, `MDXRemoteSerializeResult`, `MarkOperation`, `PannerNode`, `BuildLog`, `token`, `ClassDeclaration`, `ActionCreatorWithPreparedPayload`, `StaticLayoutProps`, `ESFixedInterval`, `Model.Element`, `OpIterator`, `Fallback`, `AddMissingOptionalToParamAction`, `DogePaymentsUtilsConfig`, `ResourceInfo`, `ILyric`, `WudoohStorage`, `IDocument`, `SdkAudioMetadataFrame`, `StyleConfig`, `RivenProperty`, `TAccesorData`, `NewExpression`, `TransitionInstruction`, `ContentModel`, `IGhcModProvider`, `TypedTensor`, `Coords3D`, `CLI`, `DbSystemEndpoint`, `Hero`, `TtLCreatorOptions`, `Algebra.TripleObject`, `DAL.DEVICE_ID_SCHEDULER`, `TargetDatabaseTypes`, `Conjugate`, `DescribeDBClustersCommandInput`, `GfxClipSpaceNearZ`, `JID`, `ISolutionService`, `TSTypeElement`, `DisassociateMembersCommandInput`, `IQueryParamsConfig`, `DocumentNode`, `FakeComponent`, `CardName`, `IListInfo`, `ScreenSpaceProjection`, `SignedBlockType`, `FunctionDesc`, `TiledTMXResource`, `IQueryConfig`, `Notifications`, `DOMHighResTimeStamp`, `SnapshotDetails`, `GanttViewDate`, `HalfEdgeMask`, `SerializedTypeNode`, `Listeners`, `ListRegexPatternSetsCommandInput`, `TestCommander`, `fs.FileStorageClient`, `Inner`, `ArrayPromise`, `ConceptTypeDecl`, `TileFeatureData`, `TraversalContext`, `Combatant`, `PageInfo`, `Chest`, `FormInstance`, `ICustomFunctionParseResult`, `TNATxn`, `Mountpoint`, `ExpressContext`, `SlackHook`, `StubProvider`, `d.MatchScreenshotOptions`, `Web3ProviderType`, `WalletCredentials`, `WidgetState`, `ValueRef`, `FacebookAuthProvider`, `ProcessEnv`, `HostKind`, `BoolLiteralNode`, `ScopedKeybinding`, `LoginState`, `AuthHttp`, `RenderErrorHandlerFnType`, `Deployment`, `FileData`, `ReflectiveKey`, `InstallOptions`, `RenderWizardArguments`, `ScheduleDoc`, `UserMetadatumModel`, `tStringDecimalUnits`, `SelectOptions`, `SortedSet`, `Transforms`, `MetroConfig`, `DiagramState`, `ConflictingNamesToUnusedNames`, `PeerSet`, `MUserId`, `ClusterProvider`, `Vehicle`, `JourneyStage`, `DescribeExportTasksCommandInput`, `QueryArgs`, `FoldingContext`, `ScreenViewport`, `MutableMatrix44`, `MaybeElementRef`, `ServerConnection.ISettings`, `GetInstanceProfileCommandInput`, `httpProxy`, `ELang`, `BaseTheme`, `TemplateIntegrationOptions`, `HttpResponse`, `LangiumLanguageConfig`, `bindable.BindingOptions`, `IExtentModel`, `ObjectId`, `StateMachine`, `BarPrice`, `TsoaRoute.Models`, `InteractionWaitingData`, `GfxAttachmentP_WebGPU`, `RequestUser`, `MappedSetting`, `SponsorOptionsOpts`, `RadioButtonComponent`, `ContentLoader`, `NodeGroup`, `ZodUnion`, `JwtVerifier`, `ContentFilter`, `ParamMap`, `QueryFormColumn`, `TreeType`, `VisDef`, `Details`, `KeyContext`, `ReduxState`, `MongoQueryModel`, `TexturedStyles`, `SchemaFormOptions`, `AsyncHooksContextManager`, `NormalizedUrl`, `IAttrValue`, `AccountItem`, `android.view.LayoutInflater`, `coreClient.FullOperationResponse`, `ChannelUser`, `IProductTypeTranslatable`, `TestClassesVariant`, `RationalArg`, `EdgeProps`, `CallbackFn`, `AssociationAddress`, `IArticleAction`, `TestRequest`, `HTMLDOMElement`, `PuppetRoomJoinEvent`, `types.IActionInputs`, `GherkinLine`, `CSSProperties`, `MapImage`, `Semigroup`, `TextCanvasLayer`, `ComponentRequestTable`, `QueryEngineEvent`, `InputValidationService`, `FilterOptions`, `PDFImage`, `DigitalCircuitDesigner`, `Geography`, `UpdateApplicationCommand`, `NanoID`, `IDocumentStorageService`, `LayoutPartialState`, `NodeBase`, `GroupData`, `PDFAcroText`, `ICacheEntry`, `ComputedParameter`, `DaffOrder`, `Api`, `DropTargetConnector`, `StreamState`, `shell.Shell`, `SugiyamaLayoutSettings`, `MeshSprite3D`, `GanttItem`, `android.net.Uri`, `VitePluginFederationOptions`, `ReconfigResponseParam`, `FirmwareWriterPhaseListener`, `PolicyViolation`, `ContractMethod`, `ShowModalOptions`, `FetchResolveOptions`, `PlaceAnchor`, `UserPoolClient`, `CustomerLayoutState`, `Filters`, `ImportLookup`, `DescribeDBInstancesCommandInput`, `GraphEdge`, `ListBase`, `React.ComponentProps`, `CreateApplicationCommandInput`, `Phaser.Geom.Point`, `Snap`, `CodeModDefinition`, `InteractionState`, `RenderingDevice`, `EncryptedMessage`, `ExecEnv`, `CharacteristicType`, `NzIconService`, `LineModel`, `PlaneGeometry`, `RemoteBreakpoint`, `IReceiveParams`, `EmotionCanvasTheme`, `MultProof`, `UnixTerminal`, `SerializedVis`, `PerfKeeper`, `DialogState`, `HouseCombatData`, `aws.S3`, `StackGroup`, `requests.ListPdbConversionHistoryEntriesRequest`, `ConstructorType`, `CellRenderer.CellConfig`, `NonFungibleConditionCode`, `ReactTestRendererJSON`, `Tokenizer`, `MediaKeyComponent`, `ScrollHooks`, `ShareParams`, `PReLU`, `GitlabAuthResponse`, `PartyMatchmakerRemove`, `vscode.InputBoxOptions`, `PartialResolvedId`, `FrameType`, `PaletteRegistry`, `PagedResult`, `ConvertedDocumentFilePath`, `Eyeglasses`, `BrowserController`, `WhereGrouper`, `StreamWithSend`, `ChartAnimator`, `NavigationExtras`, `CtxAndConfigRepository`, `LotTypeOption`, `DetectedLanguage`, `Electron.IpcRendererEvent`, `TwistyPlayerModel`, `ReactionType`, `GfxTexture`, `ILauncher`, `IToaster`, `WorkRoot`, `RobotApiResponse`, `BoxKeyPair`, `SelEnv`, `ArtifactStore`, `TimelineFilter`, `IColours`, `TransactionsState`, `IPerson`, `StitchesComponentWithAutoCompleteForReactComponents`, `IContact`, `BeneficiaryUpdateParams`, `PresentationPreviewAttribute`, `SpeedKmH`, `IsRegisteredFeatureHandlerConstraint`, `ITrackCallback`, `DescribeJobExecutionCommandInput`, `IdentifyOperation`, `RequestSpan`, `MarkdownRenderer`, `CurrencyId`, `AZSymbolInformation`, `ValueOrLambda`, `VfsEntry`, `System_String`, `IQuizQuestion`, `ChangePassword`, `MatchedMention`, `SecurityGroupRuleLocation`, `TickViewModel`, `RenderingContext`, `UpdateProfileCommandInput`, `ZWaveErrorCodes`, `IErrorCallback`, `DescribeConfigurationCommandInput`, `RgbColor`, `VisualizationContainerProps`, `LinkedDashboardProps`, `Download`, `CAC`, `requests.ListTagsRequest`, `TraceContext`, `NormalizeContext`, `LegacySprite`, `requests.ListImagesRequest`, `ErrorCodeDefinition`, `LeafletMouseEvent`, `RumSessionManager`, `BaseChartisan`, `ServerTranslateLoader`, `Ray2d`, `PaginationModelItem`, `ViewStyle`, `WebGLFramebuffer`, `ManifestCacheChangedEvent`, `Radio`, `MDL0Renderer`, `CameraControllerClass`, `KeyedReplacementMap`, `NeuralNetwork`, `TodoListModule.Actions`, `RouterStub`, `GraphQLOutputType`, `CreateModelCommandInput`, `ObjectGridComponent`, `LiteralType`, `Neovim`, `SuggestionFactory`, `NpmFileLocation`, `GoalSettingsService`, `IDBCursorDirection`, `Classify`, `Hapi.Request`, `TwingSource`, `AlertInstance`, `ChannelBytes`, `RPCProtocol`, `Curry`, `SFATexture`, `TUser`, `Monorepo`, `SbbNotificationToastConfig`, `CodeGenerator.Params`, `GLenum`, `GLclampf`, `SubtitlesFileWithTrack`, `IMidwayBaseApplication`, `IAggConfig`, `TFLiteWebModelRunnerTensorInfo`, `IConversionValidation`, `StringFormat`, `IIntegerRange`, `SynthesisVoice`, `IntervalTimeline`, `GovernanceMasterNodeRegTestContainer`, `MediationState`, `EIP712TypedData`, `Builtin`, `FakeHashProvider`, `HoverResult`, `ICategoryInternalNode`, `UIEvents`, `xLuceneFieldType`, `ThreadKey`, `RecordType`, `InstanceClient`, `requests.ListIpv6sRequest`, `LogMeta`, `NzModalRef`, `RepositorySummary`, `BoolArray`, `DeleteBucketCommandInput`, `SerializationContext`, `SelectionSource`, `NotebookPanel`, `PagerDutyActionTypeExecutorOptions`, `BlockNumber`, `GX.IndTexMtxID`, `SflibInstrumentMeta`, `ProofNode`, `AuthenticationName`, `PathParameterValues`, `ColorKind`, `Project.ID`, `Salt`, `CodeEditor.IPosition`, `LSPConnection`, `ethOptionWithStat`, `SetState`, `IColumnDesc`, `Node.MinimalTransaction`, `SpacesManager`, `IReadOnlyFunctionCallArgumentCollection`, `AcrylicConfig`, `AxisLabel`, `EngineArgs.MarkMigrationRolledBackInput`, `Collection`, `PieChart`, `CreateConnectionDetails`, `CSVDataset`, `PaymentParams`, `VerticalPlacement`, `MissingTranslationHandlerParams`, `CustomResponse`, `MiddlewareArgs`, `ResponseToActionMapper`, `Rect2D`, `ValidationRunData`, `IAddresses`, `UseMap`, `requests.ListErrataRequest`, `QueueConfiguration`, `VnetInfoResource`, `MethodDefinition`, `Ulonglong_numberContext`, `WaveformHD`, `IEventHandler`, `WikidataResponse`, `FetcherField`, `IColumns`, `AngularFireAuth`, `AndroidProject`, `RadioButtonProps`, `ConfigObject`, `ShardingInstance`, `DatasourceStates`, `RouteRecognizer`, `PresenceHandler`, `ListRecipesCommandInput`, `ComponentCompilerEvent`, `UseConnectResult`, `CsvWriter`, `MockConfiguration`, `MetamaskNetwork`, `CumsumAttrs`, `vscode.FileStat`, `RemoteArtifact`, `PortMapping`, `CancellationTokenSource`, `Flag.Parser`, `NamedCollection`, `ServiceProto`, `PasswordPolicy`, `SpringConfig`, `Type_AnyPointer`, `TextTexture`, `GetDeviceRequest`, `ChatLoggedType`, `ModelJSON`, `IRow`, `LoginSuccess`, `StringValidator`, `TsConfigLoaderResult`, `StyledCharacterStrategy`, `ESLCarouselSlide`, `Role`, `ProjectData`, `PathRef`, `TruncateQueryBuilder`, `BoxFunction`, `ProcessedPackageConfig`, `RequestEntity`, `WidgetIdTypes`, `EtaConfig`, `DeleteClusterCommandOutput`, `UpdateStageCommandInput`, `ts.FormatDiagnosticsHost`, `ts.EnumMember`, `TypeSystemEntity`, `IZoweTree`, `RouteLocationRaw`, `RuleFilter`, `ErrorObject`, `ModuleWithComponentFactories`, `StartStop`, `DescribeDBClusterSnapshotAttributesCommandInput`, `SelectSpace`, `ReadStorageObjectId`, `backend_util.ReduceInfo`, `IRequest`, `UnsupportedOperationException`, `SocketPoolItem`, `MDCShadowLayer`, `PolygonFadingParameters`, `FireLoopData`, `AggsItem`, `NotificationsService`, `ISpriteAtlas`, `PrimaryContext`, `SerialOptions`, `HttpStatus`, `MDL0`, `FileChange`, `WebSocketService`, `MetaState`, `ParseSchemaTypeInfo`, `CommandCreatorResult`, `TagObject`, `ITreeDataProvider`, `Scales`, `SortOptions`, `RawSavedDashboardPanel730ToLatest`, `Config.IConfigProvider`, `GenerateTimeAsyncIterable`, `ContractDBTransaction`, `LineIds`, `GenerateInFolderOptions`, `DiagnosticRule`, `LIGHT_INFLUENCE`, `pxtc.CallInfo`, `RatingProps`, `ApiSchema`, `FsFolder`, `D3Selector`, `TAccesorKeys`, `TinyDateType`, `Yendor.Context`, `AttributeListType`, `Summary`, `Fig.Arg`, `MeasurementKind`, `Processor`, `StatefulLogEvent`, `MatchingFunc`, `Toucan`, `CommandConfig`, `MapLayersService`, `ExactC`, `UserObjectParam`, `MonitoringStats`, `AuthenticationService`, `IERC20ServiceInterface`, `CategoricalChartProps`, `EncodingQuery`, `ICoordinates`, `PrimaryKeyType`, `IPropertyTemplate`, `vType`, `IXLSXExtractOptions`, `AddToCaseActionProps`, `AssertLocationV2`, `VcsRepository`, `PerfectScrollbarConfigInterface`, `TableDataProvider`, `GitService`, `ThemePalette`, `MemberService`, `ICoordinates3d`, `ZonedDateTime`, `TokenDocument`, `GetError`, `EasJsonReader`, `FilterDefinition`, `TrackGroupIndex`, `ScriptableContext`, `OpenPGP.key.Key`, `AnnotatedFunctionInput`, `OctoKitIssue`, `XPCOM.nsXPCComponents_Interfaces`, `SlashingProtection`, `GlobalSearchResultProvider`, `NotificationError`, `OpticsContext`, `IFluidCodeDetails`, `FunctionExpression`, `ExecutionArgs`, `GrabOptions`, `ChannelIdExists`, `TaskTiming`, `DomainType`, `PSTFile`, `android.os.Bundle`, `FunctionObject`, `IErrorState`, `ExpressionsServiceStart`, `paper.Point`, `IApp`, `IconButtonProps`, `ReadModelReducerState`, `IConditionalTag`, `ZosAccessor`, `AuthenticateFacebookRequest`, `requests.ListPackagesInstalledOnManagedInstanceRequest`, `ClientInfo`, `MsgCreateBid`, `PaperInputElement`, `Conv3DInfo`, `ServiceSummary`, `TimeOptions`, `MapMode`, `View.Mail`, `NotificationStartedInfo`, `ILoggerOptions`, `BaseResourceOptions`, `MouseOrTouch`, `LinterMessage`, `Serverless.Options`, `MemoryDb`, `AggTypesDependencies`, `SignedMultiSigTokenTransferOptions`, `DataLoader`, `WishlistsDetailsPage`, `TSTypeAliasDeclaration`, `ShadowController`, `ITelemetryData`, `SelectItemDescriptor`, `IReminder`, `JSHandle`, `EventInstance`, `ConvertionResult`, `ITriggerContructorParams`, `Prog`, `UILog`, `SceneNode`, `IFieldOption`, `IPodcast`, `AnyConfigurationModel`, `TaskConfigurationModel`, `TransactionFormSharedState`, `IncomingMessage`, `TreeviewNode`, `ColumnProperty`, `InspectionTimeRange`, `PutConfigurationSetReputationOptionsCommandInput`, `IViewData`, `OutPoint`, `ClusterService`, `UrlLoader`, `CallClientState`, `DefaultInspectorAdapters`, `MatchEvent`, `BytesLike`, `VariableDeclaration`, `TableData`, `DropAction`, `ContextContainer`, `SVObject`, `ISerializer`, `TaskFile`, `requests.ListRecommendationsRequest`, `FieldDoc`, `PageBlock`, `LoadAction`, `serialization.ConfigDict`, `ZobjPiece`, `EscapeableMethod`, `AnalysisContext`, `RestResponse`, `pulumi.CustomResourceOptions`, `Multiplexer`, `GraphQLArgument`, `IPortfolio`, `RuntimeShape`, `SecretManagerServiceClient`, `DAL.KEYMAP_KEY_DOWN_POS`, `WrapperArray`, `WebSiteManagementClient`, `TrailImage`, `ConfigurableProfilePermissions`, `ContactService`, `CalendarState`, `DeviceSummary`, `GlobalNode`, `DiagnosticSeverityOverridesMap`, `CheerioFile`, `QueryAllParams`, `NumberFormatOptions`, `ILifecycle`, `VirtualRepeat`, `TracePrinter`, `QuickInputStep`, `MDCChipSetAdapter`, `ItemProperties`, `Success`, `SignatureReflection`, `BaseNavTree`, `ObservableObjectAdministration`, `EmptyInputAndEmptyOutputCommandInput`, `TSESTree.Identifier`, `NodeData`, `ObservedNode`, `HighContrastMode`, `MainAreaWidget`, `CategoryDataStub`, `FtpNode`, `SqlFile`, `Schema`, `ScaleModel`, `MaybeLazy`, `EffectVblDecl`, `UseStylesProps`, `DisableOrganizationAdminAccountCommandInput`, `TFLite`, `Paper`, `JsonWebSignatureToken`, `B`, `HashEntry`, `DropoutLayerArgs`, `Count`, `RecordProxy`, `ScaleCompression`, `SavedDashboardPanel730ToLatest`, `SourceTargetFileNames`, `VariantType`, `LoginTicket`, `TrackedImportFrom`, `ComparisonKind`, `GfxTexFilterMode`, `UIntCV`, `Victor`, `UseSocketResponse`, `IntersectionObserverCallback`, `ElevationRange`, `RobotCard`, `TheBasicMQStack`, `ILayer`, `ViewerContext`, `IgAppModule`, `FaastModuleProxy`, `MessageHandler`, `ColorPickerProps`, `Opts`, `EndpointInput`, `DataView`, `QuestionService`, `StreamResponse`, `ObjectStorageSourceDetails`, `mendix.lib.MxObject`, `DOMPoint`, `ValidatedOptions`, `FallbackProps`, `MiddlewareArray`, `TypedDataDomain`, `ChartwerkTimeSerie`, `SavedSearchTypes`, `KernelInfo`, `EndOfLineState`, `GetNetworkProfileCommandInput`, `ParamDefinition`, `MagentoProduct`, `RulesClientFactory`, `ListReleaseLabelsCommandInput`, `DeleteChannelMessageCommandInput`, `Dialogic.InstanceEvent`, `VanessaDiffEditor`, `OAuthTokenResponse`, `ClanAggHistoryEntry`, `FieldNameList`, `W5`, `core.BTCInputScriptType`, `FileExtension`, `PipelineStageUnit`, `PortObject`, `OperatorEntry`, `patch_obj`, `BagOfCurves`, `RemoteResource`, `ListCertificatesCommandInput`, `NineZoneStagePanelPaneManager`, `requests.ListSoftwareSourcePackagesRequest`, `BuildHandlerOptions`, `ITaskWithStatus`, `RaiseNode`, `AtlasResourceSource`, `GetConfigFn`, `AtomId`, `ApiClientConfiguration`, `XPortalService`, `TranslationFile`, `MediaSource`, `WorkflowInputParameterModel`, `AppMenu`, `IGeometryAccessor`, `StackOperationStep`, `UICollectionDelegateImpl`, `CalcObj`, `GeneratedKey`, `SpaceBonus.PLANT`, `ScaleContinuousType`, `ScalarTypeSpec`, `PeerTreeItem`, `ContractName`, `TDiscord.Guild`, `TagResourceCommandOutput`, `HasId`, `JobExecutionSummary`, `FuzzyScore`, `objPool.IPool`, `Torrent`, `SubscriptListResult`, `ProductTranslation`, `PersonState`, `DaffOrderTotal`, `chrome.contextMenus.OnClickData`, `QueryEntityKey`, `GetMembersCommandInput`, `ITeam`, `LabelEncoder`, `TypeReconstituter`, `SimpleFrameStatistics`, `ParameterListDetails`, `TypedQuery`, `ObjectDictionary`, `DecoratorConfiguration`, `Tiles`, `DescribeStacksCommandInput`, `RequestDto`, `workspaces.WorkspaceDefinition`, `OpenSearchDashboards`, `RunHelpers`, `DatetimeParts`, `NavigationItem`, `AWS.CloudFormation`, `QueryEngineConfig`, `SpritesStateRecord`, `DummyResolver`, `ContactConstraintPoint`, `JoinBuilder`, `CacheStorageKey`, `Ulong_numberContext`, `DaffPaypalTokenResponseFactory`, `SSAState`, `core.ETHVerifyMessage`, `LogOptions`, `SynWorkspace`, `ShapeViewModel`, `ifm.IRequestInfo`, `FetchedPrices`, `HealEvent`, `UberPBRMaterial`, `GridItemData`, `AssociatePackageCommandInput`, `GeneratedIdentifier`, `d3.HierarchyPointNode`, `EncryptedSavedObjectsPluginSetup`, `React.TouchEvent`, `LifecycleSettings`, `ByteArray`, `XanimePlayer`, `NdQtNode`, `CancelTokenSource`, `ITypeFilter`, `CliScriptGenerator`, `TD.ThingProperty`, `IBucket`, `CommandExecutorInterface`, `NodePositionOffset`, `TransactionBeganPayload`, `OOMemberLookupInfo`, `PSTTableItem`, `IqResponseStanza`, `MlClient`, `EventHandlers`, `AnalysisMode`, `ModuleKey`, `FRAME_SVG_POLYLINE`, `ThemeVersion`, `ConditionalStringValue`, `StoreView`, `amqplib.ConfirmChannel`, `InteractiveProps`, `LambdaAction`, `ChatErrorTarget`, `AudioVideoEventAttributes`, `ParseMode`, `ResolverClass`, `StringKeyValuePair`, `DropDownElement`, `ShaderSlot`, `Thumb`, `ConfigHandler`, `BehaviorDescription`, `tfc.serialization.ConfigDict`, `VisualizationsSetup`, `ExtendedPOIDetails`, `AthleteSnapshotModel`, `SavedObjectsFindResult`, `RequestConfigT`, `XYAxis`, `ComponentProperty`, `SlideUIEvent`, `ReadOnlyAtom`, `ProtocolRequestType0`, `ColorSwitchCCStartLevelChange`, `VirtualApplianceSite`, `WriteResult`, `coord`, `BluetoothError`, `WorkspaceFolder`, `GenericDefaultSecond`, `ReleaseActionProps`, `AsyncHierarchyIterable`, `IntrospectionInputTypeRef`, `PIXI.Application`, `Decider`, `WebpackType`, `XUL.browser`, `ReactTestRenderer.ReactTestRenderer`, `Offer`, `GetBinaryPathsByVersionInput`, `tEthereumAddress`, `ProgressToken`, `FileReflection`, `UserBuilder`, `Notifire`, `DrawerState`, `VisParams`, `DropdownProps`, `DecodeResult`, `EnumDeclaration`, `IEntityModel`, `EventInit`, `IStateContext`, `d.RobotsTxtResults`, `ClassResources`, `NzNotificationRef`, `Record`, `TDiscord.MessageReaction`, `AzureParentTreeItem`, `GetterHandler`, `RecursiveShapesCommandInput`, `Prefab`, `CanvasEvent`, `ActorPath`, `FieldQuery`, `ResponseToolkit`, `Requirement`, `IMessageItem`, `MaybeType`, `JSMService`, `ControlState`, `ManagementDashboardSummary`, `Mocha.Suite`, `IProducer`, `NewBlock`, `OpenSearchSearchHit`, `ClientAndExploreCached`, `StackFn`, `CanvasTextBaseline`, `ElementGeometryCacheOperationRequestProps`, `ListrEvent`, `WidgetView.IInitializeParameters`, `Editors`, `DyfiService`, `m.Vnode`, `RequireId`, `SupportCode`, `ModelPredictConfig`, `ServiceError`, `CurrencyFormat`, `DeleteStreamCommandInput`, `ResourceLocation`, `MessageAction`, `WithLiteralTypescriptType`, `OperationLoader`, `RedisClientOptions`, `TransitionState`, `ILeaseState`, `ViewDefinition`, `SearchStrategySearchParams`, `DiagnosticRelatedInformation`, `ProtocolClient`, `StringFilterFunction`, `JSDocTag`, `ScaleOrdinal`, `GroupConfig`, `TypeScriptEmitter`, `CellItem`, `GeneratorExecutor`, `Img`, `DummySpan`, `cachedStore.Container`, `TenantId`, `AnimatedAddition`, `ALObjectWizardSettings`, `TargomoClient`, `MiTextConfig`, `GetUserData`, `Bitmap`, `ILease`, `ColorMode`, `ITemplatedComposition`, `WorkRequestResource`, `StorageManagementClient`, `ISqlEditorResultTab`, `FontWeightType`, `ApplicationCommandOptionData`, `DescribeSObjectResult`, `ProcessOutput`, `IpcRendererListener`, `ComboTree`, `NavigationRoute`, `UpdateSchemaCommandInput`, `CreateTRPCClientOptions`, `DriveManagerContract`, `DecltypeContext`, `AnimatedSettings`, `DataSnapshot`, `SerialAPICommandContext`, `AccessorEntry`, `TSESTree.Statement`, `LegacyService`, `Box`, `IReactionPublic`, `ParameterWithDescription`, `EdaDialogCloseEvent`, `STS`, `TextFieldProps`, `ParameterCategory`, `DrawerContentComponentProps`, `LoggerWithErrors`, `EventAggregator`, `ReleaseResource`, `IGlobal`, `ColumnMetricsObject`, `DraftInlineStyle`, `CollectionsService`, `StringCodeWriter`, `Cancel`, `ChapterData`, `requests.GetJobLogsRequest`, `SceneObjHolder`, `PsbtTxOutput`, `WeaponData`, `TileDescriptor`, `IChatMessage`, `CanvasRenderingContext2D`, `ExclusionVisibleVirtualNode`, `RouteQuoteTradeContext`, `DDiscord`, `BracketPair`, `MemoryShortUrlStorage`, `IndexAliasData`, `UiLanguage`, `Refresher`, `Probe`, `IModelDb`, `HotObservable`, `Collider`, `BindingName`, `BlockFormatter`, `LoadLastEvent`, `EditableCell`, `SortedQuery`, `NativeEventSubscription`, `RemoteTrackInfo`, `m.Recipe`, `SharingResult`, `IModelTransformer`, `ICaptainDefinition`, `ChainedIterator`, `CdkFooterRowDef`, `GetJobRequest`, `RectShape`, `NodeListOf`, `SBDraft2CommandOutputBindingModel`, `CompiledQuery`, `MemberNode`, `VirtualModulesPlugin`, `SVGStopElement`, `GenericAPIResponse`, `IRECProductFilter`, `FormInterface`, `EntityNameExpression`, `ISnapshotProcessor`, `CompilerWatcher`, `CrudRequest`, `NodeCallback`, `OnUpdate`, `BuildifierFileType`, `HorizontalAlignment`, `BundleOptions`, `TransferCommitment`, `promise.Promise`, `HttpServerOptions`, `PerPanel`, `TransmartNegationConstraint`, `ObservableProxy`, `CLM.ExtractResponse`, `SalesLayoutState`, `NamespacedAttr`, `PluginHost`, `thrift.Thrift.Type`, `USBInterface`, `IContextualMenuItemStyles`, `FakeCommand`, `TargetConfiguration`, `TargetDetectorRecipeDetectorRule`, `Writer`, `NodeParserOption`, `ResolverData`, `IApiStashTabSnapshot`, `AnimGroup_TexMtx`, `Sblendid`, `ActionSheetButton`, `ExternalDMMF.Mappings`, `IProtoBlock`, `MutableSourceCode`, `ECDSASignature`, `Canvas`, `MemoryAppenderConfiguration`, `TemplateUnparser`, `InputsType`, `ModMetaData`, `IHttpInterceptController`, `ExampleMetadata`, `def.Vec4`, `Edition`, `BuildAnnotation`, `pxt.PackagesConfig`, `ActiveTaskExtended`, `DynamicLinkParameters`, `ServiceStatusLevel`, `ComponentItem`, `VirtualRows`, `CompilationError`, `TimerEvent`, `IExpectedVerifiableCredential`, `ShapeOptions`, `StakingTransaction`, `CursorPosition`, `ChartsPluginStart`, `SubscribeCommandInput`, `AzureDevOpsOpts`, `TypedFormGroup`, `AuxResult`, `Vorgang`, `PluginActionContext`, `PluginRegistration`, `RushConfigurationProject`, `OrchestrationClientInputData`, `ParsedConfig`, `GridPattern`, `StreamLabs`, `SecondaryIndexLayout`, `RenderedSpan`, `ViewMetaData`, `DefinitionParams`, `CfnExpressionResolver`, `Optic`, `BackwardScanner`, `HealthStateFilterFlags`, `FieldValidator`, `LogEvent`, `ISourceLocation`, `Config.ProjectConfig`, `serialization.SerializableConstructor`, `CustomHTMLElement`, `ICXMakeOffer`, `IServiceWizardContext`, `HarmajaStaticOutput`, `IInboundSignalMessage`, `PartialStoryFn`, `AndroidPerson`, `fhir.Bundle`, `u128`, `Listing`, `RemoteInfo`, `DomainDeliverabilityTrackingOption`, `JsonDecoratorOptions`, `EthTxType`, `WatcherOptions`, `TypeDef`, `Json.Property`, `StyledProperties`, `ServerSecureChannelLayer`, `ListWebACLsCommandInput`, `GMSMapView`, `UserSettingsService`, `BottomBarArea`, `HubstaffService`, `DocumentContents`, `NewPerspective`, `AuthClientRepository`, `MaybeFuture`, `InstancePool`, `ex.Scene`, `OrganizationTeamsService`, `StopMeetingTranscriptionCommandInput`, `SFU`, `DeploymentCenterStateManager`, `windowPositionCalculationState`, `Proof`, `FzfOptions`, `ast.SyntaxNode`, `TransactionEntityDataService`, `PuppetCacheContactPayload`, `RecvDelta`, `ParsedConfirmedTransaction`, `AlgBuilder`, `MDCTabBarAdapter`, `IReducers`, `CreateTemplateCommandInput`, `IFileUnit`, `SlaveTimelineState`, `markdownit`, `DeleteRegistryCommandInput`, `Recipient`, `ITableDefine`, `XPCOM.nsICategoryManager`, `ExpanderQuery`, `ToggleDeselectSeriesAction`, `ActionMetadataArgs`, `RichEmbed`, `NativeAppStorage`, `WindowSize`, `NodeId`, `BlockFriendsRequest`, `AutoCompleteLabel`, `DeployOrganizationStep`, `IssuanceAttestationsModel`, `OperatorSpec`, `float32`, `RegisteredSchema`, `MultiSigSpendingConditionOpts`, `Types.ReadyType`, `NameValidationError`, `SFieldProperties`, `STComponent`, `AFileParser`, `PrefV2`, `IAppVolume`, `API.storage.IPrefBranch`, `SingleSigSpendingCondition`, `ResizeHandle`, `RelativePlaceAnchor`, `STATE`, `BaseAttribute`, `protocol.Request`, `TaskInfo`, `Decorations`, `RatingPair`, `TableCellProps`, `ComplexArray`, `TextMessage`, `HTMLSpanElement`, `ComponentFramework.Context`, `ParjserBase`, `GeometryKindSet`, `PlayerState`, `ResultNode`, `Point2d`, `AuthType.Standard`, `HostSettings`, `ConversationRecognizer`, `interfaces.Context`, `RequestHandler`, `AvatarOverload`, `TaskManagerStartContract`, `ClassPeriod`, `ICompareValue`, `LemonTableColumns`, `SwiftDeclarationBlock`, `Login`, `PlayerProp`, `ToggleCurrentlyOpenedByRoute`, `DummyTokenContract`, `RefactorBuffer`, `VocabularyService`, `AtlasManager`, `StrokeStyle`, `ApiKeyHandler`, `CalendarCell`, `ThemeResolver`, `DescribeDBClusterParameterGroupsCommandInput`, `Catalog`, `SharedFunctionsParser`, `Framebuffer`, `SplitCalculator`, `Lab`, `XYArgs`, `DeploymentGroupConfig`, `ExceptionIndex`, `ts.WriteFileCallback`, `Spherical`, `UserDataStore`, `Production`, `TestInvokeAuthorizerCommandInput`, `Rect2`, `Option`, `ModalContextProps`, `BitcoinUnsignedTransaction`, `UrlParams`, `CommentDocument`, `VersionStage`, `ItemDefBase`, `StackSpacing`, `FunctionConfiguration`, `SyncProtocol`, `serviceRequests.GetJobRequest`, `MultiEmitter`, `RequestQueryBuilder`, `IWorkerMessage`, `RelocateNodeData`, `ProseNodeType`, `PrismaClientValidationError`, `PostConditionMode`, `Replace`, `PenroseState`, `CesiumService`, `DirectiveTransform`, `StoreCollection`, `PublishData`, `ConsoleLogger`, `IDiagnosticsRow`, `AutoCompleteEventData`, `XHROptions`, `GameContent`, `PDFHeader`, `ProposalIdOption`, `GMxmlHttpRequestResponse`, `SearchType`, `IScalingProcess`, `OutgoingSSNResetRequestParam`, `SortEnd`, `HttpAdapter`, `PeerId`, `InputFile`, `Tax`, `D3Interpolator`, `FastTag`, `ResolvedPos`, `ICliCommand`, `CreateProjectCommandOutput`, `RuleCondition`, `TileMatrixType`, `RtcpPayloadSpecificFeedback`, `IConfirmService`, `MemberAccessNode`, `Intersection`, `SubCategory`, `ts.ArrayLiteralExpression`, `DebugElement`, `PeriodKey`, `Dictionary`, `Config.InitialOptions`, `UniformsType`, `ExecutorOptions`, `F1`, `OutputTargetDistLazyLoader`, `ImportFromAsNode`, `RouteChain`, `NavigationIndicatorCriteria`, `UIGestureRecognizer`, `GaussianNoiseArgs`, `InternalCallContext`, `CsvInputOptionsNode`, `Await`, `ControlledProp`, `TransformContext`, `AuthenticateOptions`, `DirectiveMetadata`, `RTCDataChannelParameters`, `ImageEditorTool`, `BaseRoute`, `SoftwareTransaction`, `GetLoggingConfigurationCommandInput`, `ManifestMetaData`, `AreaLightInfo`, `TemplateSource`, `Yeelight`, `ethereum.PartialTransaction`, `RemoteRenderInfo`, `AccountId`, `UIImage`, `CalendarView`, `GossipTimestampFilterMessage`, `JPABaseShapeBlock`, `TocService`, `EstimateGasEth`, `PrimaryExpression`, `Path`, `FilterFormatterFunction`, `ICompanionElement`, `UnwrapNestedRefs`, `WalletSigner`, `LinkedIdType`, `SerializedPlayer`, `unchanged.Path`, `IElementInfo`, `LocalRenderInfo`, `OpusRtpPayload`, `EventMutation`, `IGhcMod`, `AliasName`, `IGitResult`, `ComboFilter`, `CopyTranslateResult`, `TransmartDimension`, `VariableUse`, `FocusKeyManager`, `ThyTransferDragEvent`, `MEMBER_FLAGS`, `RenderTexture`, `FlexibleAlgSource`, `ColumnPreset`, `GossipFilter`, `Booru`, `PointerInfoBase`, `IEventDispatcher`, `ReactLike`, `FieldTypeSelectOption`, `IOrchestrationFunctionContext`, `SchemaModel`, `NgRedux`, `INodeDetailResolverService`, `UseMutationResult`, `IntervalScheduler`, `DiagnosticsOptions`, `HeritageClause`, `FeatureCatalogueSolution`, `LiveSelector`, `ServiceConfigs`, `TypeMetadata`, `SeasonRequest`, `GX.FogType`, `PlaintextMessage`, `WebResource`, `t.JSXElement`, `tabItem`, `CategoryCollectionStub`, `requests.ListExternalNonContainerDatabasesRequest`, `GlyphInfo`, `BuildPipelineVisFunction`, `GenericDeclarationSupported`, `SchematicTestRunner`, `MethodNext`, `NotebookFrameActions`, `ErrorWithLinkInput`, `ResponsiveColumnSizes`, `TestUser`, `MetricTypeValues`, `QuestionDotToken`, `UIEdgeInsets`, `GoogleProvider`, `Matches`, `PoolData`, `NamedField`, `WriteTransaction`, `CoreContext`, `yubo.PlayOptions`, `BodyTempState`, `SnapshotFragment`, `ScreenDto`, `KeyUsage`, `NodeDef`, `ValidatorFn`, `Json.Token`, `APIGatewayProxyEvent`, `OnPreResponseResult`, `ArrayBufferReference`, `NetworkState`, `SwitcherState`, `ServerStyleSheet`, `ActivityType`, `AndExpression`, `EzApp`, `AxisProperties`, `OptionalObjectSchema`, `BlockStackService`, `BlockProps`, `ClarityValue`, `ImportTypeNode`, `InternalServiceError`, `api.Span`, `tf.fused.Activation`, `SourceState`, `SignatureHelpContext`, `PlaneData`, `StepState`, `Light`, `RunSpec`, `ParsedMessagePart`, `CssAnimationProperty`, `HdStellarPayments`, `d.RenderNode`, `AppletIconStyles`, `Pubkey`, `TwistyPropDebugger`, `RewriteMapping`, `tr.commands.Command`, `Apollo.MutationHookOptions`, `DOption`, `IChildrenItem`, `PartialAsyncObserver`, `CodeActionContext`, `CreateBranchCommandInput`, `TelemetryService`, `ScopeType`, `JoinedEntityType`, `SmartHomeApp`, `ASRequest`, `GitRepository`, `DisLabel`, `SettingsRow`, `CopyDBParameterGroupCommandInput`, `AlignmentTypes`, `IssuePublicationIdentifier`, `IConnectable`, `MapConstructor`, `BucketHandler`, `puppeteer.Page`, `TStyleSheet`, `AddressInfo`, `ISource`, `NamespaceNode`, `ClientState`, `CancelSignal`, `RenderConfig`, `FilterFn`, `Counter1`, `TransmartExportJob`, `MapData`, `CustomCode`, `MessageResponse`, `HttpsCallable`, `IWorkflowBase`, `TabEntity`, `LocalNetworkDevice`, `TiledLayer`, `IChange`, `requests.ListPublishersRequest`, `GenericWatchpoint`, `IGherkinLine`, `DeployResult`, `IHttpConfig`, `makerjs.IModel`, `DateEnv`, `requests.UpdateConnectionRequest`, `RawDimension`, `NetWorthSnapshot`, `TokenlonInterface.TxOpts`, `DBSymbol`, `BoundedGrid3D`, `DeclarationStatement`, `CssAstVisitor`, `TranslationGroup`, `CollectorOptions`, `GenericIndexPatternColumn`, `DomainEvent`, `ApiResponseOptions`, `ITemplateMagic`, `IConnectToGitHubWizardContext`, `DeleteSecurityConfigurationCommandInput`, `VideoDeviceInfo`, `tensorflow.ISignatureDef`, `InsertChange`, `IArticleData`, `DetectorRecipeDetectorRule`, `ODataVersion`, `DealRecordsConfig`, `LspDocuments`, `DataBySchema`, `AcLayerComponent`, `RouteCache`, `freedom.RTCPeerConnection.RTCConfiguration`, `FormErrorMessageModuleConfig`, `NumberValue`, `StringMap`, `Notify`, `UIInterfaceOrientation`, `IBuilder`, `KeyFunc`, `Real`, `NodeDetails`, `ConfigurationParams`, `DangerResults`, `LinkProof`, `WebSocketEventListener`, `RawToken`, `ComponentCompilerMethod`, `ControlBase`, `MutationTree`, `CalloutProps`, `ICandidateCreateInput`, `teacher`, `MaskObject`, `ParsingResult`, `ChartOffset`, `ProtocolPeerInfo`, `Assembly`, `TSLet`, `CancelSource`, `LaunchRequestArguments`, `VsixInfo`, `HistoryInstructionInfo`, `DateFormatter`, `StackGroupConfigNode`, `Kernel.IFuture`, `IDeferred`, `TheSimpleGraphQLServiceStack`, `TestExporter`, `AtomOptions`, `CompilerState`, `EnumItem`, `CodeGenFieldConnection`, `NamedProblemMatcher`, `Filesystem`, `ts.TypeNode`, `MockCSSRule`, `TimelineTrack`, `TUserAccountProvider`, `LayerInfo`, `NoteName`, `MetricSeriesFragment`, `cc.Event.EventMouse`, `OneToOneOptions`, `PersonChange`, `ListJobRunsRequest`, `Sudo`, `DeleteJobCommandInput`, `UnitFormatOptions`, `RectLike`, `RadSideDrawer`, `DeckPart`, `AvatarCustomization`, `BackgroundBlurOptions`, `SbbIconOptions`, `FX`, `FieldDescriptor`, `RangeSelector.RangeObject`, `ethers.Contract`, `PureEffect`, `PlatformTypes`, `RegisteredModule`, `QueryInterface`, `MatSnackBarContainer`, `TypeSet`, `ElementDecorator`, `GalleryProps`, `ItemTable`, `TEmoji`, `ProjectFile`, `IResourceExpression`, `BuildInstance`, `GitScmProvider`, `SystemManager`, `CreateAudioArgs`, `Trilean`, `TestDataObject`, `FunctionAddInfo`, `GestureStateEventData`, `CentralSceneCCNotification`, `ImageOptions`, `NohmModel`, `InvalidGreeting`, `TypeDBClusterOptions`, `LayoutDefaultHeaderItemComponent`, `HistoricalDataItem`, `StudioState`, `PDFPageLeaf`, `CRDTObject`, `FieldConfig`, `RoutingService`, `ContextMenuInteraction`, `MIRRegisterArgument`, `OpenAPIParser`, `Dockerode`, `ExpShapeSlice`, `PropTypeFinder`, `SmartPlayer`, `NodeCreator`, `LinuxJavaContainerSettings`, `BuildResult`, `RegistrationService`, `ArtifactItem`, `Tap`, `logger.Logger`, `IComm`, `FindQuery`, `ColumnSeries`, `NodeDisplay`, `Bill`, `KyselyPlugin`, `d.CompilerJsDocTagInfo`, `BooleanLike`, `Objective`, `Interval`, `BlitzPage`, `RMCommandInfo`, `ObservableLanguagePair`, `LoggerLevel`, `TaskRunnerFactoryInitializerParamsType`, `NoArgListener`, `MovementComponent`, `Tags`, `FileSystemResolver`, `PDFPage`, `StageContentLayoutProps`, `InteractiveController`, `Section`, `UtilitiesService`, `TransactionEnvelope`, `Boost`, `RenderBox`, `NumberFormat.UInt32LE`, `CAShapeLayer`, `Binding`, `XAudioBuffer`, `ProgressMessage`, `SimulatedPortfolio`, `SipgateIOClient`, `JSXMemberExpression`, `fpc__ProcessName`, `SessionType`, `vscode.ExtensionContext`, `AnalyticsService`, `BaseEdge`, `Flicking`, `LoginUserDto`, `OutputTarget`, `ChannelChainInfo`, `SyntaxNode`, `EngineArgs.DevDiagnosticInput`, `TimeHistory`, `FileTreeComponent`, `pageOptions`, `IndicatorsData`, `ISWATracker`, `DraggableDirective`, `Attribs`, `IconStorage`, `ModifyDBSubnetGroupCommandInput`, `LayerStyle`, `FeatureModule`, `MutableImageRef`, `ActiveSelection`, `ListMatchesRequest`, `CreateOfficeHour`, `KeyPhrase`, `PrimType`, `EntityDbMetadata`, `BodyDefinition`, `FunctionCallArgumentCollection`, `DecodeInfo`, `HistoryLog`, `StreamSelection`, `XYZ`, `d.JsonDocsMethod`, `SpaceBonus.DRAW_CARD`, `NodeFileSystem`, `ListChannelMembershipsForAppInstanceUserCommandInput`, `CommentController`, `ControllableEllipse`, `IDiffStatus`, `WatchStopHandle`, `IGameContextValue`, `SimpleASTSeq`, `Candidate`, `IGlobalManager`, `PromiseLike`, `Localization`, `StyleFunctionProps`, `CSharpNamespace`, `MainSettings`, `CstmHasuraCrudPluginConfig`, `TestBedStatic`, `MySQLConnection`, `PartitionOptions`, `CircleCollider`, `firebase.User`, `ExecutionOptions`, `ConversationService`, `OverflowT`, `SpecFiles`, `ts.MethodSignature`, `Datastore.Transaction`, `ts.AsExpression`, `NpmPackage`, `Cypress.PluginConfigOptions`, `Submitter`, `JobPostLike`, `XActionContext`, `Picker`, `TKey2`, `MerkleTree`, `T.MachineEvent`, `MacroKey`, `DatabaseUsageMetrics`, `ServerAccessKeyRepository`, `DBTProjectContainer`, `IMedal`, `MetaType`, `ResponseMessage`, `IMdcTextFieldElement`, `WaitContext`, `LexicalEnvironment`, `DataStoreTxEventData`, `DateTimeRecognizer`, `PageContent`, `AllAccessorDeclarations`, `ColorScaleInfo`, `DisassociateFromMasterAccountCommandInput`, `ColumnOptions`, `BenefitMeasurement`, `ISQLScriptSegment`, `CommitData`, `MyComponent`, `DeleteObjectCommandInput`, `ListAvailabilityHistoriesRequest`, `ServiceDependency`, `ImportedCompiledCssFile`, `ShapeConstructor`, `ERenderMode`, `BackupJSONFileLatest`, `CommentStateTree`, `ImGui.Style`, `ITimerToggleInput`, `TemporaryStorage`, `Replay`, `LogAnalyticsLabelAlias`, `d.CompilerFileWatcherCallback`, `AnnotationEventEmitter`, `Quantity.REQUIRED`, `IAGServer`, `ShareContextMenuPanelItem`, `AMM`, `TwingCompiler`, `PaginationNextKey`, `BerryOrm`, `ResponsiveProperties`, `TransformerArgs`, `LoadingManager`, `requests.ListCaptchasRequest`, `FeaturedSessionsState`, `CustomInputArgs`, `Nav`, `WhereClause`, `UserRegisterData`, `GltfNode`, `CommonWalletOptions`, `TicketsState`, `DerivationPath`, `UseDropdown`, `IpcMessageEvent`, `SharedAppState`, `CanaryExecutionResult`, `Align`, `PrivKeySecp256k1`, `VRMSchema.VRM`, `DataColumn`, `ParamDef`, `PermissionType`, `Highcharts.VMLElement`, `CameraMatrix`, `faunadb.Client`, `WalletService`, `ListRealtimeContactAnalysisSegmentsCommandInput`, `BooleanFilter`, `PinReference`, `MappingParameters`, `TextDocument`, `WriteStream`, `HeadClient`, `TriggeredEvent`, `requests.ListDrgsRequest`, `DatasourceSuggestion`, `BlockHeader`, `ProviderItem`, `ProgressState`, `GlobalState`, `WorkUnit`, `WebTally`, `IStageManifest`, `Facet`, `RuleViolation`, `PutFileContent`, `ToolsWorkspaceCommandResponse`, `WebSocketEvent`, `SchemaUnions`, `ComponentCompilerLegacyConnect`, `GfxReadbackP_GL`, `Watching`, `IDeploymentTemplate`, `UnidirectionalLinkedTransferAppState`, `ExtractedAttr`, `AddressDetails`, `Address4`, `RectangleSize`, `SavedMultisigEntry`, `LoaderManager`, `ActivityAudience`, `TextRangeCollection`, `Motor.StopActionValue`, `AuthReducerState`, `RecoilValue`, `DispatchByProps`, `StorageModuleOptions`, `Microgrammar`, `TriumphCollectibleNode`, `TextureState`, `core.ETHSignMessage`, `SavedObjectSanitizedDoc`, `ClusterContract`, `IRenderFunction`, `MetaQuestion`, `EthereumProvider`, `DateRangeInputProps`, `DynamicClasses`, `RollupSourceMap`, `NodeCanvasRenderingContext2D`, `DiffPanel`, `AttributeSelector`, `OciError`, `PullRequestViewModel`, `UserGroupList_UserGroup`, `Cls`, `WithCSSVar`, `ViewContext`, `GetItemFn`, `AntiVirusSoftware`, `VariableExpression`, `ir.Block`, `Heap`, `StudioServer`, `ObservableQuerySecretContractCodeHash`, `AbiItemModel`, `SerializedRenderResult`, `AngularFirestoreDocument`, `VersionResult`, `DepthModes`, `DebugId`, `GraphQLTagTransformContext`, `Gain`, `TInjectableOptions`, `LinearlyReferencedFromToLocationProps`, `IFileTreeItem`, `Vec2Sym`, `TestServer`, `SalesforceFormValues`, `BaseParams`, `IALBListenerCertificate`, `IPackageVersionInfo`, `Main`, `IFormInput`, `ILinkWithPos`, `IPageHeader`, `EgressSecurityRule`, `ThemeModeEnum`, `ISets`, `BitArray`, `UrlType`, `DefaultViewer`, `ForkEffect`, `MIRType`, `DaffCartCoupon`, `CandidateCriterionsRatingService`, `UpdateAssetCommandInput`, `TextStringContext`, `requests.ListDedicatedVmHostsRequest`, `GroupProperties`, `ExtendedHttpsTestServer`, `BasePathCoverage`, `ProcessedFile`, `GroupUserEditResponse`, `Path5`, `HammerInstance`, `TreemapSeries.ListOfParentsObject`, `ObjectModel`, `TutorialRuleStatus`, `ExpressionAttributeNames`, `SavedObjectTypeRegistry`, `EntityUpdate`, `BUTTON_SHAPE`, `OperationDetails`, `PluginCreateOptions`, `FlattenInterpolation`, `RequestMessage`, `NodeEntry`, `ProxyServerType`, `DetectorBuilder`, `Brackets`, `StackTrace`, `RoutingRule`, `Range1d`, `IDeliveryNetworkResponse`, `Paginate`, `TFEOpAttr`, `AtToken`, `CobIdentifier`, `Brush`, `StateMap`, `Cancelable`, `OAuthClient`, `DeleteProjectCommandOutput`, `SearchService`, `CreateCertificateDetails`, `DataProxy`, `GridItemHTMLElement`, `pd.E2EPageInternal`, `AlignItems`, `CreateObservableOptions`, `SettingGroup`, `CancelJobCommandInput`, `AttributeIds`, `RootScreenDelegate`, `IContainerRuntimeMetadata`, `PluginBuild`, `RepositoryFactory`, `MeshLODLevel`, `SignatureAlgorithm`, `BuildableTree`, `ImportNode`, `ProgressCallback`, `DashboardUrlGeneratorState`, `CreateStageCommandInput`, `ActionDefinitionByType`, `ResFont`, `BlockchainExplorerProvider`, `UnderlyingSource`, `ListIdentityProvidersRequest`, `IRootScopeService`, `SnapshotGenerator`, `MissionElement`, `MessageBoxOptions`, `GlobalEventsService`, `SeriesChanges`, `TDataGroup`, `MThumbnail`, `CommandBuffer`, `LeaderboardRecordList`, `IImperativeError`, `Characteristic`, `NotificationTime`, `LegacyOperation`, `tmp.DirResult`, `ts.IScriptSnapshot`, `GalleryImageVersion`, `FileHolder`, `ClassicComponentClass`, `ModuleInterface`, `AddToLibraryActionContext`, `nodenCLContext`, `OidcCtx`, `OhbugUser`, `PayloadTooLargeError`, `StateKey`, `BlockNumberUpdater`, `IUserData`, `SVNumeric`, `PanelState`, `MeaningfulDependency`, `AbsoluteSizeSchema`, `ListViewWrapper`, `UserSelection`, `ListDatasetEntriesCommandInput`, `GlobalMaxPooling2D`, `ICacheConfig`, `ButteryNode`, `LoaderAttributes`, `PkGetter`, `GridRenderCellParams`, `XPConnectService`, `LineResults`, `IAuthenticationManager`, `Bass`, `LinkedEntry`, `IBoxPlotColumn`, `DataState`, `msRest.Mapper`, `TestSetup`, `EncryptionMaterial`, `RematchStore`, `PageBlobGetPageRangesResponse`, `SiteInfo`, `RequestParams`, `DirectivePosition`, `Elt`, `SFAMaterialBuilder`, `ITableParseResult`, `AbstractRegisteredService`, `IReferenceLayer`, `DeleteQuery`, `IResult`, `NavigationPluginStartDependencies`, `ConnectorReferenceHandler`, `requests.ListWafTrafficRequest`, `LedgerState`, `PreCheckerClient`, `AxisLabelCircular`, `SelectorType`, `ListDevicesRequest`, `ExpressRouteGateway`, `MentorBasic`, `LinesResult`, `PartialList`, `TransactionEndedPayload`, `HttpErrorContext`, `ItemSliding`, `Git`, `RoleIndexPrivilege`, `FluidObject`, `MappedTypeDescription`, `IShikiTheme`, `ComponentOrTag`, `ParsedUrlQuery`, `ImportDefaultInterface`, `UiState`, `Nodes`, `DirectiveOptions`, `CsvReadOptions`, `Animal`, `IJwtPayload`, `NormalizeStateContext`, `CanvazNode`, `ReturnTypeInferenceContext`, `UnauthorizedException`, `ITicks`, `NodeEventTypes`, `VnetGateway`, `TimelineNonEcsData`, `SignatureHelpParams`, `TextureData`, `Defer`, `IJobFile`, `MXFloatingActionButtonLocation`, `DeleteManyResponse`, `Encoding.Encoding`, `KillRingEntity`, `ResourceSummary`, `apid.EncodeId`, `IOptionFlag`, `BitmexSpy`, `ReadFileOptions`, `SortablePlayer`, `AuditEvent`, `CountableTimeInterval`, `RawMessage`, `WideningContext`, `IExecutionContext`, `ResizeOptions`, `CollectionTransaction`, `Contributor`, `SurveyPDF`, `$Promisable`, `VisitorContext`, `ApiMethod`, `DOMHandlerScope`, `SocketContextData`, `TokenStreamRewriter`, `DownloadedFiles`, `StynWalk`, `CategoricalParameterRange`, `NgForm`, `NodeView`, `TextTrackCue`, `MockToken`, `ConvLSTM2DCellArgs`, `Channel`, `IntervalNode`, `Proto`, `EqualityComparer`, `AnyObj`, `FibaroVenetianBlindCCReport`, `GetMeshSourceOptions`, `core.IProducer`, `INodeUi`, `NavigationEdge`, `DocViewsRegistry`, `ExportTraceServiceRequest`, `JoinPoint`, `NotificationLevel`, `OutputTargetDistCustomElements`, `IColumnToolPanel`, `NewE2EPageOptions`, `ZIlPayCore`, `SpotifyErrorResponse`, `ZipFileOptions`, `QueryBucket`, `IChangeInfoHash`, `MDCTextFieldOutlineAdapter`, `is_pressedI`, `TronSignedTransaction`, `ServerlessAzureConfig`, `RenderTexture2D`, `StatefulChatClientOptions`, `RouteArgs`, `PageSourceType`, `ParticipantsJoinedListener`, `ClipPlane`, `DetailedPeerCertificate`, `AdminService`, `SnackbarService`, `StoreResource`, `Rx.TestScheduler`, `TypeValue`, `MerchantMenuOrderEntity`, `DotenvLoadEnvResult`, `GeoUnitDefinition`, `TaggingInfo`, `KeyedAccountInfo`, `GfxRenderPass`, `PrincipalTokenCurveTrie`, `messages.Meta`, `TIndex`, `NamedImports`, `DecryptParameters`, `ITenant`, `BSplineSurface3d`, `ProfilePage`, `androidx.appcompat.app.AppCompatActivity`, `UpdateTableCommandInput`, `S3Destination`, `RenderTarget`, `TextTrack`, `Immutable.List`, `DQCCacheData`, `TransactionInfo`, `MockValidatorsContract`, `SubscribeParams`, `React.WheelEvent`, `TargetDisplaySize`, `MessageOption`, `StateType`, `BroadcastOperator`, `TimeRange`, `TooltipPoint`, `ContainerState`, `ThemeCreator`, `IMessageRepository`, `PropertyChangeData`, `TArgs`, `CollidableLine`, `SocketIO.Socket`, `Chip`, `ResponderType`, `MockDomController`, `TxEventContext`, `PolicyRequest`, `ViewColumn`, `ValidationFlags`, `CalendarDay`, `AutoRestGenerateResult`, `PDFAcroForm`, `KeyPairOptions`, `IndexedMap`, `ReadonlySet`, `CIFilter`, `AdministratorName`, `ArrayList`, `RangeContext`, `JSONSchemaAttributes`, `TwingEnvironment`, `El`, `PersistAppState`, `TheTask`, `ConnectionGroup`, `TextureFetcher`, `TypeConsApp`, `MemoryWriteStream`, `CommitID`, `X`, `SettingLanguage`, `FileSystemManager`, `CurrentItemProps`, `IVersion`, `Zipper`, `HapService`, `IFabricEnvironmentConnection`, `ContributionRewardSpecifiedRedemptionParams`, `ObservablePoint3D`, `ComputedStateCreationOptions`, `QueryServiceSetupDependencies`, `wjson.MetricWidgetAnnotationsJson`, `CollisionDirector`, `CompilationParams`, `InventoryInteractionService`, `BuildNoChangeResults`, `IMongoResource`, `ConfigurationEnv`, `ReadonlyMat`, `TRPCResponse`, `ExpressionFunctionClog`, `RawNode`, `TriggerForm`, `IPFS`, `StorageKeys`, `labelValues`, `CreateChannelModeratorCommandInput`, `ArtifactDelta`, `GaxiosResponse`, `BIP32Path`, `DashboardSavedObject`, `ParsedCssDocument`, `IngressSecurityRule`, `VECTOR_STYLES`, `IParamSignature`, `InputGenerator`, `DebugProtocol.LaunchResponse`, `objectPointer`, `DeleteMemberCommandInput`, `ComponentsProps`, `CSSInterpolation`, `HandlerFunction`, `RedirectPolicy`, `ListAnswersCommandInput`, `StateVariables`, `CodePointPredicate`, `SpacesService`, `core.LifecycleSettings`, `MapViewInset`, `BaseKey`, `GlobalGravityObj`, `MarkBuilder`, `CompletionState`, `StreamZip`, `ts.FunctionLikeDeclaration`, `SignatureVerifier`, `DecompiledTreeProvider`, `ICellRenderer`, `FormFields`, `PlatformAccessory`, `AppCompatActivity`, `EnvironmentInfo`, `SlotAst`, `ProjectStatus`, `ContextView`, `IResolvedConfig`, `PackedTrie`, `TimeTicksInfoObject`, `ts.VariableDeclaration`, `ScryptParams`, `CustomNestedProps`, `CustomFormGroup`, `NpmPublishClient`, `FabricEvent`, `AccountDetails`, `XLSX.WorkSheet`, `HeftEvent`, `EnvironmentProps`, `TypeOfContent`, `DataRowItem`, `EmployeeViewModel`, `SimControlLog`, `OpaqueToken`, `SearchableItemPresenter`, `DNSPacket`, `FunctionImpl`, `MenuListProps`, `google.maps.LatLng`, `XCommentNode`, `ApiNotificationReceiver`, `MessageCollector`, `ErrorArea`, `ProseNodeMap`, `DtlsContext`, `DestinationFetchOptions`, `IBeaconConfig`, `VideoListQueryDto`, `SimulationInfo`, `ClassDeclarationStructure`, `DataLabelOption`, `SeedOnlyInitializerArgs`, `msRest.OperationParameter`, `ConvertedLoopState`, `MemoryPages`, `fs.WriteStream`, `Store`, `OverpassElement`, `T16`, `GitHub`, `Jest26CacheKeyOptions`, `LexoDecimal`, `ReadModelRequestEnvelope`, `Pass`, `FurParam`, `TimeHistoryContract`, `GetDMMFOptions`, `DashboardContainerInput`, `NetworkService`, `TokenFlags`, `FargateService`, `PointLike`, `AnalyzableNode`, `IToastOptions`, `Colors`, `UpdateParameters`, `DominantSpeakersInfo`, `MassetDetails`, `vscode.OutputChannel`, `ExtraControlProps`, `ContextName`, `AutoconnectConfig`, `DefaultItemType`, `IBucketDateHistogramAggConfig`, `CSSValues`, `ConvectorController`, `DeserializeWire`, `Urbit`, `Shorthand`, `PiLogger`, `HelmRelease`, `MutationObserverInit`, `Document`, `QuestionAdornerComponentProps`, `FakeProgbar`, `Objkt`, `Point2DData`, `JSX.TargetedMouseEvent`, `DeleteFunctionCommandInput`, `EventProvider`, `_STColumn`, `ExtendedClient`, `PathFilter`, `TestChangesetSequence`, `StringPublicKey`, `RheaEventContext`, `fGlobals`, `ExtensionItem`, `ProsodyFilePaths`, `Survey.Page`, `PosBin`, `DaoFilter`, `CheckAvailabilityProps`, `TTarget`, `StringFilter`, `ScopedObjectContext`, `ResponseType`, `DeferredAction`, `Schematic`, `MIRRecordType`, `DiskOptions`, `Facade`, `PersistedSnapshot`, `Position3DObject`, `TArray`, `IInterceptor`, `TheoryItem`, `ListableObject`, `ILanguageRegistration`, `Dropout`, `vscode.DocumentFilter`, `MongoError`, `ParserPlugin`, `DockerContainer`, `StepType`, `BulkActionProps`, `IHook`, `MultiPolygon`, `PolymorphicPropsWithoutRef`, `MicroAppConfig`, `AnyJson`, `CSI`, `MaterialInstanceState`, `FieldItem`, `TransactionBuilderFactory`, `ObsoleteOptions`, `ValidationRuntimeOptions`, `TestScriptOptions`, `YggdrasilAuthAPI`, `MetaData`, `UiActionsPlugin`, `DistanceFn`, `IAssetPreviewProps`, `Margin`, `ProjectParser`, `SystemFixture`, `SymbolOptions`, `PathToProp`, `IOtherExpectation`, `UntagResourceCommand`, `ExpressionFunction`, `VisualizationOptionStore`, `execa.ExecaChildProcess`, `WhileStatement`, `monaco.Uri`, `MetadataValueFilter`, `ISearchSetup`, `ExtensionPackage`, `ICommentData`, `DockType`, `EntityModel`, `RequestEvent`, `ChimeSdkWrapper`, `SFC`, `EnergyAmounts`, `IKChain`, `SetType`, `PartialEmoji`, `Flair`, `TaskExecution`, `Inline`, `AsyncCache`, `FormatterOptionsArgs`, `ListBuildsCommandInput`, `browser.management.ExtensionInfo`, `IAPProduct`, `TensorList`, `Pump`, `IncomingHttpResponse`, `PreprocIncInfo`, `TLBounds`, `FooId`, `freedom.RTCPeerConnection.RTCPeerConnection`, `IGarbageCollectionData`, `requests.ListApplicationsRequest`, `ContextEntry`, `PageDTO`, `CBlock`, `BrowserState`, `IZosmfIssueParms`, `PendingRequest`, `RandomNumberGenerator`, `AreaSeriesStyle`, `EngineAPI.IApp`, `IAmazonServerGroupCommand`, `StatedBeanContextValue`, `MediaView`, `ColonyExtensionsV5`, `ViewPortService`, `TraversalStrategy`, `ActionToRequestMapper`, `JsonRpcResponseCallback`, `SearchContext`, `ExpressionRenderer`, `PaneProperty`, `core.CommonInputFieldConfig`, `RowContext`, `VoteAccountAsset`, `ErrorResponseData`, `MenuEvents`, `GetZoneRecordsResponse`, `EC2`, `NotificationsServiceStub`, `RX.CommonProps`, `CursorPopupInfo`, `FeatureType`, `Path8`, `JSON5Config`, `SourceControl`, `LimitLine`, `ITextProps`, `SendDataRequest`, `MutableMatrix33`, `SkipListNode`, `ObjectList`, `CharWhere`, `IUtxo`, `CustomerData`, `HTMLTemplateElement`, `TypedEventFilter`, `EventNameFnsMap`, `GitHubPRDSL`, `SimplifyOptions`, `Direction`, `MutationFunc`, `ImGui.Access`, `CompilerJsDocTagInfo`, `NoticeService`, `BitcoinSignedTransaction`, `ItemDataType`, `PageData`, `requests.ListVirtualCircuitsRequest`, `DatatableRow`, `MockActivatedRoute`, `V1ContainerStatus`, `NVMJSON`, `BitbucketUserEntity`, `E2EProcessEnv`, `EffectComposerComponent`, `IPointAttribute`, `IModelIdArg`, `Group.Point`, `Mask`, `GadgetInstanceService`, `TSClient`, `ClassifiedParsedSelectors`, `ColumnDef`, `LitCallback`, `ClientImpl`, `CallbackOptionallyAsync`, `BuildrootUpdateSession`, `MeetingAdapter`, `ExtendedHttpTestServer`, `DaemonConfig`, `PerfTools`, `ActionTypeRegistry`, `BooleanLiteral`, `SyncExpectationResult`, `NameObjRequestArchivesFunc`, `TransferEvent`, `ElementSet`, `EmbeddableFactory`, `FiscalCode`, `DeleteStageCommandInput`, `SystemStats`, `Preference`, `FolderNode`, `AddUserCommand`, `UIFill`, `OnGestureEvent`, `BundleOrMessage`, `TreeOption`, `AWSSNSEvent`, `DataServiceError`, `MarkdownOptions`, `SceneGraphComponent`, `ScreenInfo`, `Bootstrap`, `esbuild.OnLoadResult`, `StringDict`, `ApiItemReference`, `SubEntityType`, `LoadOptions`, `CanvasItem`, `BalmScripts`, `UiButton`, `lambda.Function`, `Interpreter`, `JulianDay`, `IParseHandler`, `SimpleStore`, `ConnectionDataEnvelope`, `AkimaCurve3d`, `UnionableType`, `MetadataAccessor`, `ResolvedGlTF`, `IpfsApi`, `RouterService`, `InsertDelta`, `IdentGenerator`, `ColumnAnimation`, `RangeDelta`, `IssuePriority`, `CoreIndexFile`, `TimeScaleUnit`, `LinterOffense`, `IExplorer`, `ts.BindingName`, `DeleteServiceCommandInput`, `CaptureOptions`, `SetSelectionMenuDelegate`, `RumInitConfiguration`, `NavigationTree`, `UrlForwardingPlugin`, `ListWorkspacesRequest`, `React.FormEvent`, `IMyDpOptions`, `KirbyAnimation.Duration`, `HowToPay`, `Tabs`, `DelayFunction`, `ResourceOptions`, `ExpBool`, `TypedAxiosResponse`, `PiElementReference`, `IImageExtended`, `PopulatedTagDoc`, `KeyToIndexMap`, `CdsButton`, `CompressedId64Set`, `IPipe`, `ITelemetryGenericEvent`, `Uint64`, `WlDocs`, `HashFunction`, `ClientRegistry`, `UserInstance`, `StyleInfo`, `StackingState`, `IReq`, `MessageReceivedListener`, `JSNode`, `CircuitGroupState`, `BadgeStyle`, `SymbolIndex`, `EstimateGasValidator`, `ReadableFilesystem`, `ARGS`, `TreeviewFlatNode`, `model.TypeName`, `CesiumLayer`, `FoundationType`, `MatSelectChange`, `TestEntry`, `AWSContext`, `FieldFormatsGetConfigFn`, `types.TracerBase`, `AlertInput`, `IUIProperty`, `RelayModernEnvironment`, `ITemplateId`, `SelectMenuProps`, `enet.INetEventHandler`, `AbstractSqlDriver`, `Field.PatchResult`, `DynamicFormValidationService`, `JsonParserGlobalContext`, `IDownloadFile`, `MainAccessResponse`, `DbStxEvent`, `Comm`, `Anchor`, `IProjectItem`, `BookmarkIdMapping`, `PackageEntry`, `Nodes.ASTNode`, `ProtoServer`, `ApplicationConfigState`, `DeviceConfigService`, `PullRequestState`, `DappRequest`, `Corner`, `QueryOptionNames`, `d.OutputTargetDocsJson`, `BasePlugin`, `PartialStepState`, `RxjsPipeline`, `S2L2ALayer`, `RGBStrings`, `MatOpN`, `IMesh`, `ApplicationStateMeta`, `android.view.View`, `ViewBoxParams`, `BookingService`, `OfficeMockObject`, `LegacyRequest`, `RenderMode`, `firebase.firestore.WhereFilterOp`, `TPagedParams`, `FilterFor`, `DDL2.Schema`, `Property`, `DialogRow`, `SendView`, `MiddlewareConsumer`, `AddressBookEntry`, `BlockNumberState`, `angular.IQService`, `d.ComponentOptions`, `ListMembersCommandInput`, `PlannerConfigurationScope`, `AuthorizationServiceSetup`, `Aggregator`, `EmbeddableFactoryProvider`, `GX.DiffuseFunction`, `vscode.DiagnosticSeverity`, `AlainConfig`, `AstSymbol`, `MessageToMain`, `PostTexMtx`, `DeviceDescriptor`, `Archives`, `tf.Tensor2D`, `SimpleSwapAppState`, `ITelemetryBaseLogger`, `VerificationContext`, `ISignal`, `Neutrino`, `ArcShape`, `Matrix3x3`, `HeadProps`, `ListDomainsRequest`, `DrawerHelperOptions`, `ISetActionTypes`, `CreateChannelCommandInput`, `ImportsMetadata`, `SchemaArg`, `CogJob`, `ClassDescription`, `PIXI.InteractionEvent`, `BasicSourceMapConsumer`, `NineZoneStagePanelsManager`, `KanjiNode`, `IChangeTarget`, `GridSprite`, `IUploadAttributes`, `UpdateArticleDto`, `match`, `TMethod`, `DaffCategoryFilterEqualToggleRequest`, `IgnoreMatcher`, `FieldView`, `RadioItem`, `RemoveNotificationChannelCommandInput`, `Information`, `requests.ListQuotasRequest`, `DomModule`, `ChatMessageReceivedEvent`, `Fs`, `Matcher`, `AssetChangedEvent`, `requests.ListSoftwareSourcesRequest`, `Crop`, `SimpleBBox`, `FormErrorsService`, `StatsAsset`, `ExternalAuthenticateResultModel`, `provider`, `StarPieceHostInfo`, `DeviceProps`, `ValidationData`, `ReporterFactory`, `Workspace`, `IssueSummary`, `BaseProperty`, `GDIContext`, `FacetsState`, `DescribeImagesCommandInput`, `RTCIceCandidateJSON`, `MediaSubtitlesRelation`, `ContainerImage`, `DAL.DEVICE_ID_SYSTEM_MICROPHONE`, `SignedOnUserService`, `ProgressBar`, `RequestContext`, `sdk.Conversation`, `AlertsClient`, `VoiceFocusConfig`, `BenefitMeasurementIndicator`, `ResolutionHelper`, `requests.ListCpeDeviceShapesRequest`, `PubEntry`, `IApiComponents`, `MediaStreamOptions`, `EndpointDescription`, `StateDefinition`, `CacheChangeEventData`, `SimpleComparator`, `ConfigValues`, `AnimationKeyframeLinear`, `React.ReactPortal`, `RatePretty`, `YDefinedFn`, `EnvConfig`, `DataItem`, `BSplineCurve3d`, `ChartSpecPage`, `ColumnsType`, `drawerModule.RadSideDrawer`, `OrganizationUnitDto`, `MultipleClassDeclaration`, `ActionEvent`, `BlockchainEnvironmentExplorerProvider`, `CounterState`, `SlmPolicy`, `Seq`, `requests.ListSourceApplicationsRequest`, `ICreateUpdateLanguageConfig`, `LabwarePositionCheckStep`, `SoftwareSourceId`, `IJWTPayload`, `TransformFactory`, `RequestInit`, `SelectableDataPoint`, `TydomDeviceSecuritySystemData`, `AppCommitment`, `requests.ListVlansRequest`, `StarknetERC721ContextInterface`, `ParameterObject`, `CodePrinter`, `ExportingOptions`, `PropertyChangeResult`, `CreateVolumeCommandInput`, `RegistryRuleType`, `IAggregateConfiguration`, `OpenYoloError`, `SKFillItem`, `CompactOrCondition`, `ShowProjectService`, `Parslet`, `ItemMap`, `C8`, `NgOpenApiGen`, `TagMapper`, `EmployeeAppointment`, `Scalar`, `DeleteJobResponse`, `SSOLoginOptions`, `DateAdapter`, `HydrateImgElement`, `MultilevelSensorCCReport`, `SliderValue`, `Callsite`, `ReduxActionWithPayload`, `Is`, `CalculateBoundsFn`, `StructureController`, `RawMatcherFn`, `ParticipantPermission`, `IqSelect2Item`, `RPCRequest`, `PrimedGroup`, `BuiltQuery`, `Datepicker`, `MatchLeave`, `DefaultEditorSize`, `FormlyFieldConfig`, `ts.TypeReferenceNode`, `GfxAttachmentState`, `IContentSearchOptions`, `DescribePackageVersionCommandInput`, `IDocEntryWeight`, `ListInvitationsCommandInput`, `IAddressState`, `CodeMirror.EditorFromTextArea`, `CompiledBot`, `DeployProps`, `STXPostCondition`, `ByteSizeValue`, `TreeNodeType`, `Customer`, `DeleteRepositoryPolicyCommandInput`, `GoEngineState`, `StateDiff`, `UploadLayerPartCommandInput`, `CreateAccountsValidationResult`, `DecorationRenderOptions`, `Type_AnyPointer_Parameter`, `InputModel`, `CmsContext`, `BanGroupUsersRequest`, `OutputTargetDist`, `ICollectItem`, `OnDemandPageScanRunResultProvider`, `NoiseSocket`, `LatLon`, `UpdateDomainCommandInput`, `EIP712Types`, `RegistryClient`, `Reader`, `Deck`, `AdjacencyGraph`, `types.IDBRow`, `ValidatorData`, `ChartUsage`, `UncachedNpmInfoClient`, `Process`, `IMainConfig`, `StateCallback`, `IOrganizationProjectsFindInput`, `KVS`, `FormControl`, `AdamaxOptimizer`, `UsersService`, `IGetTimeLogConflictInput`, `ObjectSelectionListState`, `LegendProps`, `OnConflictNode`, `AST.MustacheStatement`, `Safe`, `SearchCallback`, `GeometryKind`, `InstallTypes`, `HoverProviderItem`, `TrackId`, `ListUI`, `RunGroupProgress`, `XlsxService`, `Synthetic`, `TypescriptEditorPane`, `AppEntry`, `StatusUpdate`, `SolidLineMaterial`, `CollectionNode`, `KeyStop`, `CreateTokens`, `ConfigsService`, `CssClasses`, `ANSITerminalStyleRenderer`, `EventKeys`, `Screens`, `CreateSecurityConfigurationCommandInput`, `IItemScore`, `CommentInfo`, `LayerSpec`, `EliminationBracket`, `Errors`, `TreeModelSource`, `IJetApp`, `ScrollSpiedElementGroup`, `ServerListEntry`, `PotentialEdge`, `ShareTransferStore`, `NgrxAutoEntityService`, `OpenSuccessCallbackResult`, `InstancedMesh`, `Iter`, `TypeParser`, `NucleusVersion`, `DefaultRollupStateMachine`, `NVM3Object`, `d.EntryModule`, `LeaderboardEntry`, `ServerSideProps`, `ListClient`, `KernelFunc`, `LoggerWithTarget`, `MDCSwitchAdapter`, `TransactionError`, `MeterCCReset`, `AttachedProperty`, `CustomDialogOptions`, `TextDocumentSettings`, `CoreTypes.TextDecorationType`, `EMSSettings`, `ExpressionAstFunctionBuilder`, `angular.resource.IResourceClass`, `NoInputAndOutputCommandInput`, `TeamList`, `RelationInput`, `HTMLProgressElement`, `Dubbo`, `RawSavedDashboardPanel620`, `InjectionKey`, `RouterMock`, `BSPEntity`, `SubContext`, `FunctionCallValue`, `float`, `E1`, `GrowableXYArray`, `Float32ArrayConstructor`, `ChangeSetItem`, `AuthorizedRequest`, `ArmResourceTemplate`, `PutResourcePolicyResponse`, `LivelinessMode`, `WorkspacePlugin`, `BUNDLE_TYPE`, `IExternalStorageManager`, `TableCell`, `UnaryExpression`, `JSONEditorSchema`, `BandFillColorAccessorInput`, `AsyncResource`, `d.ErrorHandler`, `requests.ListIPSecConnectionTunnelsRequest`, `JsonDocs`, `SVGVNode`, `QueryStringInputProps`, `StrokeOptions`, `JSESheet`, `AndroidActivityBackPressedEventData`, `ExecutionResultDataDefault`, `CreateRegionPureReturnValue`, `LitvisDocument`, `ThemesTypes`, `UserPoolService`, `CommentGraphicsItem`, `RemoteEvent`, `Facets`, `ENDAttribute`, `MailerService`, `BuilderOutput`, `WalletManager`, `Seed`, `messages.Background`, `PromiseCollection`, `TransitionSpec`, `MapEntry`, `ComponentTypeOrTemplateRef`, `RequestPolicyFactory`, `LocalUser`, `cback`, `EntryProps`, `WalletConnect`, `RoomState`, `CW20Instance`, `GraphQLContext`, `StackUndeployOperation`, `bitcoin.payments.Payment`, `S2CellType`, `SliderOpt`, `TerminationStrategy`, `MyUser`, `Ti18nDocument`, `UnitConversion`, `AutoScalingMetricChange`, `Animated`, `BandcampSearchResult`, `BaseClusterManager`, `d.OutputTargetDistCollection`, `IActiveLearningSettings`, `UnionOfConvexClipPlaneSets`, `CaretCoordinates`, `ModelTypes`, `NodeWrap`, `WebdriverIO.Browser`, `PointSet`, `JSONSchemaSourceData`, `StacksNetwork`, `ValidationAcceptor`, `NavigatedData`, `Datastore`, `BackgroundStyle`, `StateFromFunctionReturningPromise`, `ethers.BigNumber`, `RouteObject`, `SwiperProps`, `ArticleItem`, `JEdge`, `ISimpleConfigFile`, `HDOMNode`, `ReleaseType`, `StepOption`, `CreateFlowCommandInput`, `DragRefConfig`, `runtime.HTTPQuery`, `DescribeScalingPoliciesCommandInput`, `UnescapedString`, `requests.ListClustersRequest`, `tBootstrapFn`, `ml.Attribute`, `MIDIControlListener`, `SymOpts`, `FSAOptions`, `DescribeEnvironmentsCommandInput`, `AXNode`, `CreateDatasetCommandInput`, `IMidwayBootstrapOptions`, `RenderData`, `OverlayBackgroundProps`, `DeviceManifest`, `PatternLibrary`, `DeleteGroupCommandInput`, `DynamicFormControlEvent`, `AggregationCursor`, `ObservableArrayAdministration`, `UserThemeEntity`, `Decision`, `Web3ProviderEngine`, `BatchWriteRequest`, `DQLSyntaxErrorExpected`, `IOdspResolvedUrl`, `RlpItem`, `SessionDataResource`, `LexerResult`, `UserStoreProperty`, `InstallWithProgressResponse`, `Note`, `OutputEndpointData`, `XmlElement`, `PReLULayerArgs`, `DynamicActionsState`, `mod.LoadBalancerTarget`, `ContentMatcher`, `FSError`, `TestResource`, `WasmResultValues`, `OutlineDoOauthStep`, `VisualizationToolbarProps`, `GeoInput`, `Projects`, `tflite.TFLiteModel`, `KeyIdentity`, `CSR`, `MatchmakerAdd`, `TokenStream`, `ILeg`, `BrowseCloudDocumentWithJobs`, `KeyframeAnimationInfo`, `HdTronPaymentsConfig`, `OverlayPortal`, `TPluginSettingsProps`, `IndexOpts`, `TestModel`, `HasSelectorNodes`, `DomainEventSubscriber`, `EphemeralTaskLifecycle`, `Decipher`, `SelectMenuItemProps`, `Compiler`, `LanguageModel`, `ExportNodeProperties`, `BufferStatusResult`, `IUIField`, `ChildField`, `AsyncHook`, `SRT0_MatData`, `ContractFraudProof`, `Finality`, `ParametersPositionContext`, `AnchorPosition`, `MetamodelService`, `ResourceLabelFormatter`, `IpcCommandType`, `CardRenderSymbol`, `BlockDefinition`, `DeploymentExecutor`, `ng.IAngularEvent`, `MetadataTypeGatherer`, `KanbanBoard`, `TriumphFlatNode`, `OutlineSurveys`, `NetworkProvider`, `PrivateDnsZoneGroup`, `StateReceipt`, `ChildInfo`, `GraphObject`, `ProfileInfo`, `TextAnimationRefs`, `GoalStatus`, `DetailedStackParameter`, `CreateSecurityProfileCommandInput`, `SubscriptionItem`, `ExadataInfrastructureContact`, `FilteredHintItem`, `FabricIdentity`, `NavigationGraphicsItem`, `THREE.Scene`, `DataTypesInput.Struct1Struct`, `Micromerge`, `QueryOrderMap`, `LSTMState`, `CodelistRow`, `PlotlyLib`, `EidasResponseAttributes`, `Chai.ChaiUtils`, `InputStep`, `Jimp`, `planner.PlannerConfiguration`, `RepositoryEditWriteModel`, `HookFunction`, `MigrationSubject`, `MessageFormatter`, `StyleClasses`, `CacheService`, `EndpointWithHostLabelOperationCommandInput`, `BodyPartDefinition`, `PrimitiveNonStringTypeKind`, `CodeUnderliner`, `MockNexus`, `CanvasTextAlign`, `ListIndicesCommandInput`, `TsickleIssue1009`, `DateObject`, `vscode.OnEnterRule`, `URLDescriptor`, `UpdateConfigurationResponse`, `RequestOptionsArgs`, `ImportEditor`, `ObjectASTNode`, `LangOptions`, `KeplrGetKeyWalletCoonectV1Response`, `APEv2Parser`, `EmitHint`, `TSESTree.CallExpression`, `LambdaRestApi`, `YDomain`, `RadioGroupProps`, `BasicRoller`, `ModelField`, `ExpressConnection`, `ClientSession`, `BaseScope`, `DogRepresentation`, `AAA`, `ContinueStatement`, `MenuTree`, `DescribeEventCategoriesCommandInput`, `C5`, `ILoadbalance`, `AbstractTransUnit`, `SwUrlService`, `NoteContent`, `CurveColumnSeries`, `FactoryDatabase`, `B64EncryptionResult`, `PickleStep`, `WalletPage`, `IGetContentOptions`, `UnitConversionSpec`, `IntervalCollectionIterator`, `IndexedColumn`, `ApiDeclaration`, `PatcherServer`, `WebsocketInsider`, `Link`, `ITransValueResult`, `HttpClientService`, `ast.CallNode`, `SpacesPlugin`, `ContentWidget`, `ChemControllerState`, `BuildEntry`, `PromiEvent`, `DoOnStreamFns`, `CertaConfig`, `IInstallManagerOptions`, `_.Dictionary`, `requests.ListDomainsRequest`, `FunctionDeclarationStructure`, `SysMenu`, `BasePlacement`, `CredentialResponseCoordinator`, `Prompt`, `RepoSyncState`, `DesktopCommand`, `InterfaceCombinator`, `Skill`, `SpawnerOrObservable`, `TableOfContentsItem`, `MarketDataProvider`, `IAccessToken`, `NavigationBarNode`, `ProjectInformationStub`, `EntityTypeDecl`, `CreateTransformer`, `Users`, `DataEvent`, `XCascadeNode`, `WifiConfigureRequest`, `SVGDatum`, `MsgCreateProvider`, `SpecificWindowEventListener`, `PaginateQuery`, `ICloudFoundryServerGroup`, `CommandOption`, `RLAN`, `JobCreatorType`, `AssetMarketPrice`, `JwtHeader`, `CompositeGeneratorNode`, `IntersectParams`, `RequestCallback`, `ts.Symbol`, `RecordBaseConcrete`, `childProcess.ChildProcess`, `RequestObject`, `Circuit`, `PickKeyContext`, `MarketHistory`, `PointerInfo`, `ActiveEnvironment`, `cytoscape.SingularElementArgument`, `TinyQueue`, `IColorHierarchy`, `TestModelVersion`, `VariableTable`, `providers.TransactionResponse`, `OnSuccessCallback`, `BasicBlock`, `Concept`, `ChannelListItem`, `IAngularScope`, `ts.CustomTransformers`, `NodeItem`, `NetworkContext`, `IRun`, `UiCounterMetricType`, `ActionReducerMapBuilder`, `MathExpression`, `vscode.TextEditorDecorationType`, `SetupResult`, `Leg`, `JPAExTexBlock`, `Accord`, `ValidatorResponse`, `UpdateSecurityProfileCommandInput`, `PlayerEntity`, `PointerDownOutsideEvent`, `t_b1f05ae8`, `LoadableComponent`, `BooleanFilterFunction`, `GetIPSetCommandInput`, `IVssRestClientOptions`, `DockerFacade`, `StreamReport`, `MdxListItem`, `RemoteDataBuildService`, `DataCharacter`, `BooleanCV`, `LineMetrics`, `VerticalAlignValue`, `RenderArgs`, `FormPage`, `LeaguePriceSource`, `StatePathsMap`, `IProduct`, `AttachedModule`, `ProgramCounterHelper`, `ITestPlan`, `DestinyInventoryItemDefinition`, `IRequestOption`, `Axial`, `ECR`, `ConvLSTM2DCell`, `F`, `ZoneDef`, `ColumnConfiguration`, `ReadableStream`, `FlipperLib`, `net.Server`, `CreateEventSubscriptionMessage`, `MetricsResults`, `CustomersState`, `SidebarLinkProps`, `SendFunc`, `PluginEditorProps`, `NativeCallSyntax`, `BoostDirectorV2`, `ServerType`, `MDL0Model`, `BSONType`, `MigrateStatus`, `TView`, `CommandModule`, `ReaderObservable`, `EbsBlockDevice`, `BBox`, `CompositeTraceData`, `CliCommandOptions`, `MessagingSessionObserver`, `TypeParameterReflection`, `OscillationState`, `d.HydrateAnchorElement`, `TreeCheckboxStateChangeEventArgs`, `FirestoreForm`, `Redex`, `SendRequest`, `PUPPET.payloads.Message`, `SM`, `SystemVerilogContainerInfo`, `DeleteApplicationReferenceDataSourceCommandInput`, `RegExp`, `Pier`, `GetFunctionCommandInput`, `IAugmentedJQuery`, `ProgressDashboardConfig`, `guildDoc`, `t.STSelector`, `SupClient.AssetSubscriber`, `MatchArgsToParamsResult`, `OptionedValueProp`, `Compressor`, `FirstConsumedChar`, `GlobalParameter`, `ICharacteristic`, `AnySchema`, `MarketFiltersState`, `CurrentUserType`, `EqualContext`, `TestExecutionInfo`, `VueSnipState`, `BuildVideoGetQueryOptions`, `Profiles`, `InternalOpExecutor`, `TemplateAst`, `PuppeteerScreenshotOptions`, `AuthenticateSteamRequest`, `Send`, `DisjointSetNode`, `HTMLTitleElement`, `DmmfDocument`, `RollupConfigurationBuilder`, `PathHash`, `PutRetentionPolicyCommandInput`, `ClippedPolyfaceBuilders`, `TPermission`, `KeyframeIcon`, `ICollection`, `requests.ListHttpMonitorsRequest`, `CardModel`, `Record.Update`, `RegisteredRuleset`, `ArgType`, `RootState`, `CustomElement`, `RotationSettings`, `DestinationCertificate`, `TelemetryPluginConfig`, `PredictionContext`, `Grouping`, `StringValueNode`, `MigrationResult`, `Department`, `ODataQueryMock`, `ApiChanges`, `WeakSet`, `InterviewPrizePlaylist`, `RoosterCommandBar`, `ObjectTypeKind`, `TokenSmartContract`, `UpdateDataSourceCommandInput`, `ExtendedBlock`, `UsersController`, `SelectionModel.Selection`, `ArrayConfig`, `NodeJS.ProcessEnv`, `ChartsPlugin`, `CheckType`, `WorkspaceMiddleware`, `Color3`, `RemoteVideoStreamState`, `TestKeyring`, `AdtHTTP`, `DecodedResult`, `JPA.JPABaseEmitter`, `HeroById`, `PredictablePickleTestStep`, `ParquetSchema`, `InstallablePackage`, `DependencyResolver`, `SessionDescription`, `Rendered`, `IFoo`, `ClientRepresentation`, `ValidationParams`, `SandDance.VegaDeckGl.types.VegaBase`, `ClientSubLocation`, `Guard`, `QuizReaction`, `DateWidget`, `DefinitionNode`, `IIdentifier`, `ng.ui.IStateProvider`, `IConsul`, `CorrelationsParams`, `LocalizableString`, `ListPipelineExecutionsCommandInput`, `INamedDefinition`, `ChainStore`, `ListFHIRExportJobsCommandInput`, `SkipOptions`, `RelationshipPath`, `DeleteWebACLCommandInput`, `SInt64`, `DebugProtocol.EvaluateResponse`, `PointOctant`, `USampler3DTerm`, `Ellipsoid`, `ElementPosition`, `IDataProvider`, `ActionButtonProps`, `SeriesComposition`, `Electron.MessageBoxReturnValue`, `EthereumEvent`, `AutorestExtensionHost`, `TxHelper`, `ServeOptions`, `Rounding`, `TsActionCreator`, `CpuInfo`, `BrowserRequest`, `electron.BrowserWindow`, `StatementNode`, `RouteName`, `DbIncrementStrategy`, `PragmaNameContext`, `TemplateManifest`, `MDCShapeCategory`, `CompilerOutput`, `TestingEntity`, `IExpressServerOptions`, `INormalAction`, `ApplicativeHKT`, `TrueSkill.RankState`, `Jwk`, `SignedOrder`, `ToggleState`, `AccessibilityKeyHandlers`, `SpriteComponent`, `TabLocation`, `TheEventbridgeEtlStack`, `INodeInfo`, `OmvFilterDescription`, `RepositoryRepository`, `TT`, `socketIO.Server`, `AssessmentTypeData`, `EmployeesService`, `XmppChatConnectionService`, `ShaderVariableType`, `GitHubActions`, `SnippetProvider`, `allContracts`, `UserScriptGenerator`, `MemoString`, `LambdaOutput`, `IConsole`, `EmittedMessage`, `RouteMethod`, `FullFilePath`, `ServersState`, `NetGlobalMiddleware`, `AdminDatabase`, `EnforceNonEmptyRecord`, `JumpyWidget`, `Guy`, `IAPIRepository`, `ModuleReference`, `HashMapStructure`, `SavedObjectMigrationContext`, `MatTableDataSource`, `RuleSummary`, `ExecutionPlanImpl`, `LinkChain`, `IDomMethods`, `EntityWithGroupType`, `DistrictsDefinition`, `IGenericObject`, `InterfaceInternal`, `CursorEvents`, `PoseNetOutputStride`, `EditorActionsManager`, `ServiceExitStatus`, `Ball`, `AppServicePlan`, `ChatErrors`, `PDFKitReferenceMock`, `WalletConnectConnector`, `ModuleSymbolMap`, `DatabaseQuery`, `AuthedRequest`, `EditablePolyline`, `IStoryItemChange`, `ApiContract`, `VisTypeIconProps`, `GetTokenResponse`, `RematchDispatch`, `ResolvedConfigFilePath`, `ProposalResponse`, `SingleAssetTwoPartyIntermediaryAgreement`, `ClassPrototype`, `GenericConstructor`, `ProviderInput`, `GLintptr`, `PedalTuning`, `SearchUsageCollector`, `TestRelation`, `float64`, `YVoice`, `y`, `AuthStrategy`, `protos.google.iam.v1.ISetIamPolicyRequest`, `ListStreamsRequest`, `FlipperServerImpl`, `StreamConfig`, `TextureInputGX`, `SQLParserListener`, `FunctionProperties`, `Swatch`, `SecureChannel`, `cc.Button`, `IMetricContext`, `CombatStateMachineComponent`, `IMyDate`, `Dimensions`, `egret.Point`, `IEmbeddable`, `Bonus`, `Models.IPositionStats`, `StartExportTaskCommandInput`, `ExpressionExecOptions`, `ResolvedNative`, `StaticSiteCustomDomainRequestPropertiesARMResource`, `IProxy`, `AbstractControlState`, `ITestConfig`, `CommandPayload`, `SubTiledLayer`, `guildInterface`, `WebGL2RenderingContext`, `OrderBy`, `http2.ClientHttp2Session`, `jest.SnapshotSerializerPlugin`, `FocusOptions`, `TransferDetails`, `SignatureKind`, `GroupLevel`, `IUserPP`, `PostList`, `CFMLEngineName`, `AccessorComponentType`, `EntityMetaData`, `CloudFormationClient`, `FullOptions`, `LogInRequest`, `http.ServerRequest`, `ITrackStateTree`, `apid.ReserveSaveOption`, `LastValues`, `ConfigurationSectionEntry`, `GenericNumberType`, `NextCharacter`, `TS`, `EnvPair`, `EdiDocumentConfigurationResult`, `FileDataSource`, `PartialCliOptions`, `TimelineHeaderWrapper`, `NoteCollectionState`, `Shared`, `NotFoundException`, `DevtoolsInspectorState`, `PlanetPortalApplication`, `SongData`, `MutableVector3d`, `TPT1AnimationEntry`, `ProjectConfig`, `ShEnv`, `DraggableStateSnapshot`, `CeramicSigner`, `ContractEventDescriptor`, `MergeTree.Marker`, `_ts.Node`, `StyleAttribute`, `CipherImportContext`, `TPath`, `IExportData`, `TraitMap`, `GraphPath`, `HeftConfiguration`, `IPlDocObject`, `iNotification`, `ParticipantsAddedEvent`, `ServerError`, `GuildChannelResolvable`, `QueryAllProvidersAttributesRequest`, `ExecutorState`, `LineBasicMaterial`, `EuiValues`, `MathsProcessor`, `PointAttribute`, `EnumIO`, `http.IncomingHttpHeaders`, `FormGroupDirective`, `HIDDevice`, `CompilationData`, `StorageInterface`, `FilesMatch`, `IBudgieNode`, `Accent`, `PartyName`, `TeardownLogic`, `Internal`, `HTMLScTooltipRowElement`, `Burst`, `INotesGetByContactState`, `AssetPropertyTimestamp`, `LatestControllerConfigType`, `DbCommand`, `StateReaderObservableEither`, `PointOptions`, `NotifierPluginFactory`, `UIPickerView`, `TxResult`, `CUUID`, `ControlComponentProps`, `FragmentManager`, `FD_Entity`, `UInt64Value`, `DeployLocalProjectConfig`, `sinon.SinonSandbox`, `ContentManager`, `NotificationEvent`, `AppExecution`, `EventCreator1`, `TableInstance`, `SFAMaterial`, `EntitySelectors`, `DistanceQueryInterface`, `Alternative`, `NameObjFactoryTableEntry`, `FirebaseAuth`, `OrderType`, `MultiCommandCCCommandEncapsulation`, `ThyDragOverEvent`, `RevocationReason`, `BoxConstraints`, `GitBuffer`, `GfxSamplerDescriptor`, `OmvFeatureModifier`, `Spotilocal`, `CreateProfileCommandInput`, `HostWindowService`, `RequestApproval`, `IncomingWebhookSendArguments`, `LayerNormalization`, `CommonWrapper`, `SuperAgentTest`, `PrivateEndpointConnectionsDeleteOptionalParams`, `TextTheme`, `CommitStatus`, `FluentBundle`, `MessageStateWithData`, `AreaProps`, `CompareFunction`, `GX.SpotFunction`, `AlertExecutorOptions`, `LambdaServer`, `TinaCloudCollectionEnriched`, `StylingFunction`, `Types.PluginOutput`, `MailboxEvent`, `MapItem`, `AxisConfig`, `InspectorViewDescription`, `BarDataSet`, `UIModeType`, `IProp`, `TestSet`, `TEasingFn`, `BadgeProps`, `Arrow`, `SyncData`, `CacheManagerOptions`, `Concatenate`, `HsAddDataVectorService`, `RequestPolicyOptionsLike`, `ItemsOwner`, `AxiosError`, `BlockchainContext`, `TouchControlMessage`, `com.google.firebase.firestore.Query`, `CrochetType`, `BitbucketPrEntity`, `AxiosPromise`, `KeysRequest`, `AstEntity`, `Sidekick`, `api.IZoweDatasetTreeNode`, `BlockContext`, `PoseNet`, `NgWalker`, `VerifyEmailAccountsValidationResult`, `DenseLayerArgs`, `PlaneType`, `ModuleBuilder`, `MessageActionRow`, `mapProperties`, `ParamsOfAppDebotBrowser`, `OAuthUser`, `JsonlDB`, `DocHandler`, `ImportedModuleDescriptor`, `ChatThreadClientState`, `CompareResult`, `ExprContext`, `AppStore`, `MathBackendCPU`, `FrescoDrawee`, `ItemSpec`, `AddonClass`, `LocalGatewayTreeItem`, `CanvasFillRule`, `StructureListMember`, `FieldQueryBase`, `PP`, `NamespaceOperatorDecl`, `ISiteDefinitionDocument`, `ServiceLogger`, `double`, `ForceLightningLwcStartExecutor`, `ProjectRisk`, `TransmartHttpService`, `KeyPathList`, `AzureConfigs`, `SelectItem`, `Genesis`, `DataViewRow`, `RequestApprovalService`, `MonadIO`, `HintManager`, `ListProjectsRequest`, `FileMetaData`, `SearchCriteria`, `AnalyzerLSPConverter`, `PropertyOperation`, `DescribeLimitsCommandInput`, `InterventionTip`, `SWRInfiniteConfiguration`, `MimeType_`, `HandlerExecutionContext`, `OasSchema`, `NgxUploadLogger`, `requests.ListVcnsRequest`, `AnnotationService`, `OnPostAuthResult`, `UseCaseExecutor`, `ObservableQueryProposal`, `HTTPHeaders`, `TabType`, `nls.LocalizeFunc`, `SearchDevicesCommandInput`, `SearchFiltersState`, `TLE.StringValue`, `IPane`, `ManagedFocusTrap`, `CollectionOptions`, `Accessibility`, `RouterProps`, `InputToken`, `DimensionGroup3D`, `PushToServiceResponse`, `Fold`, `VfsStat`, `Http3QPackEncoder`, `ActivityStatus`, `OrganizationContactService`, `MockContainerRuntimeFactoryForReconnection`, `DecimalPipe`, `IConnectOptions`, `JsonDocsMethodReturn`, `NzTabSetComponent`, `RecursiveXmlShapesCommandInput`, `SVSize`, `JWK.Key`, `ProviderLike`, `BrowserBehavior`, `UpdateProjectDto`, `TagFilter`, `TimeInput`, `SparseGrid`, `SalesforceFormFields`, `StopInstanceCommandInput`, `BasicAction`, `TestBackend`, `ts.PostfixUnaryExpression`, `SfdxOrgInfo`, `ListTagsForResourceResult`, `RoleModel`, `CustomerState`, `Config.GlobalConfig`, `RejectInvitationCommandInput`, `ExampleDefinition`, `UpdateDeviceCommandInput`, `PopupType`, `RouteFilter`, `IBBox`, `FSNetwork`, `OptionsProps`, `J3DLoadFlags`, `MongoCron`, `BaseAtom`, `DocumentMapper`, `QueueSendMessageResponse`, `IconTheme`, `Scrobble`, `Window.ShellWindow`, `EdgeInsets`, `CID`, `DynamicFormControlModel`, `MdcDialogConfig`, `Concourse`, `FlatNode`, `ControllerInstance`, `ModelFitDatasetArgs`, `TNSCanvas`, `LockerService`, `AudioContextManager`, `MenuStackItem`, `CryptographyService`, `TimestampFormatHeadersCommandInput`, `IonContent`, `ConvertFn`, `Piece`, `TimeSection`, `RookCephInputs`, `PackageJsonDependency`, `CardView`, `CreateDemandDTO`, `EosioActionTrace`, `ReactiveChartDispatchProps`, `IIteratorResult`, `FrameNavigation`, `SingleProof`, `MyCompanyConfig`, `Curry2`, `JIterator`, `CollectionReturnValue`, `ElementEventCallback`, `ICriteriaNode`, `UploadObservable`, `EventbusService`, `UrlTemplate`, `JSXAttribute`, `NextServer`, `GX.CompareType`, `TexCoord`, `ChatCommand`, `IMessageOptions`, `NinjaItemInfo`, `ProblemRowData`, `MeetingAdapterState`, `Rehearsal`, `DrawingGraph`, `ErrorInfo`, `GenericClassProperty`, `FetchHttpClient`, `PLSQLSymbol`, `CounterFacade`, `LazyQueryHookOptions`, `RestoreWalletHandler`, `GenericLogger`, `AbstractUIClass`, `IFieldFormat`, `GlobalScript`, `ts.SemanticClassificationFormat`, `CollectionReference`, `SwimlaneRecordPayload`, `IStaggerConfig`, `LessOptions`, `SrcDecoder`, `PropertiesField`, `MetricAggTypeConfig`, `Rebuilder`, `IUser`, `MDCChipActionType`, `AsyncOrderedHierarchyIterable`, `NumberRenderer`, `PostModel`, `SymbolKey`, `CustomEvent`, `protocol.FileRequest`, `GroupedRequests`, `MapEventsManagerService`, `RentalService`, `IResizeInfo`, `IGetOptions`, `BaseRange`, `HttpConnection`, `d.PixelMatchInput`, `LoaderFunction`, `NavigationScreenProp`, `IDBKeyRange`, `GetBucketPolicyCommandInput`, `TextElementState`, `AmountOptions`, `AnimationClip`, `Icon`, `videoInfo`, `ListEnvironmentsCommandOutput`, `LayoutVisualizationGroup`, `Dummy`, `SetStateFunc`, `SimpleScalarPropertiesCommandInput`, `CustomScript`, `ProviderDef`, `NoopExporter`, `IProperties`, `AddMessage`, `EmittedObject`, `t.Node`, `TagToken`, `SerializedFieldFormat`, `ExpressionWithTypeArguments`, `VqlClient`, `EightBittr`, `ARDimensions2D`, `CommandLineStringListParameter`, `BeneficiaryDTO`, `IWrappedExecutionContext`, `MulticallResponse`, `ITokenRefresher`, `DistanceM`, `DynamoDbPersistenceAdapter`, `FlexPluginArguments`, `IAttachMessage`, `Relayer`, `FeatureSet`, `DescribeRoutingControlCommandInput`, `OutputAsset`, `MultiRingBuffer`, `MessageOptions`, `PolygonProps`, `GDITrack`, `StateRef`, `ComponentsState`, `EnvVars`, `GestureTypes`, `SourceStorage`, `EmojiType`, `IIndex`, `ParticipantContents`, `StateTimeline`, `ColumnsPreviewType`, `IHasher`, `SceneControllerConfigurationCCGet`, `ChainID.Mainnet`, `AppGachaItem`, `CreateAlbumeDto`, `KeysSource`, `LOGGER_LEVEL`, `BScrollFamily`, `ListDatabasesCommandInput`, `ElkLabel`, `NoteworthyApp`, `p5ex.SpriteArray`, `OptionalIdStorable`, `STMultiSort`, `AuthStore`, `GetUserSettingsCommandInput`, `AriaDescriber`, `NavProps`, `DealService`, `MDCFloatingLabelFoundation`, `IAsyncParallel`, `ArtifactEngineOptions`, `SignedStateVarsWithHash`, `TxStatus`, `IUnlisten`, `QueryMany`, `puppeteer.KeyInput`, `ApiErrorService`, `STPSetupIntent`, `WaveProperties`, `IPackageFile`, `SObjectDefinition`, `IWorker`, `ContainerType`, `types.ScriptType`, `PlotLineOrBand`, `ITriggerEvent`, `PersonEntity`, `Listable`, `FIRQuery`, `SearchConfigurationService`, `IHomeViewState`, `LongOptionName`, `IPropertyWithHooks`, `TreeSelectionState`, `requests.ListCpesRequest`, `Active`, `IPositionComponent`, `ParseLocation`, `AsyncIterableX`, `PanelOptions`, `TFSavedModel`, `TextState`, `NavController`, `Bit`, `PageHelpers`, `RibbonButton`, `UrlFilter`, `PickScaleConfigWithoutType`, `BitcoinNetwork`, `StateDecoratorAction`, `Degrees`, `BlinkerResponse`, `DeleteWorkspaceCommandInput`, `TransitionConditionalProperties`, `RestorePoint`, `DataDirection`, `VFileMessage`, `LightData`, `TrackParseInfo`, `DataServiceConfig`, `MbLayer`, `ExtendedPoint`, `UserStakingData`, `DateIntervalFormatOptions`, `StoredOrder`, `AuthenticationProgramStateBCH`, `AccountRefresh_VarsEntry`, `INotebookTracker`, `SyntheticEvent`, `BodyInit`, `NestFastifyApplication`, `PSIVoid`, `EnumType`, `GooglePlus`, `TensorBuffer3D`, `HubIModel`, `XMLCharState`, `IStreamPolygon`, `HomeAssistantMock`, `BSPTraversalAction`, `InstanceLightData`, `FileDeleteOptions`, `InviteService`, `JGadget`, `MediaWiki`, `UpdateDatasetEntriesCommandInput`, `Recipients`, `EntityCompanionDefinition`, `MatchmakerMatched_MatchmakerUser_StringPropertiesEntry`, `PedigreeConstraint`, `TreeViewItem`, `ContractFunctionEntry`, `CustomSecurityService`, `DataModel.ColumnRegion`, `HelloResponse`, `AADResource`, `DataPacket`, `TodoTaskList`, `PluginOpaqueId`, `GraphQLModules.ModuleContext`, `MouseDownEvent`, `ProxyTarget`, `ListChannelsModeratedByAppInstanceUserCommandInput`, `BlocksModel`, `ECInstancesNodeKey`, `Resources`, `UserPreKeyBundleMap`, `GraphImpl`, `App.contentSide.ICommunicationToBackground`, `PrivilegeFormCalculator`, `IBabylonFileNode`, `HTMLTableSectionElement`, `IChannelsDatabase`, `ContainerRegistryEvent`, `DelonLocaleService`, `Traversable`, `Cookie`, `IProjectRepository`, `AnyPatternProperty`, `ICordovaAttachRequestArgs`, `CSSEntries`, `HomebridgeLgThinqPlatform`, `SignatureProvider`, `ReportFilter`, `PhysicalKey`, `JsonValue`, `UserReport`, `QueryBidsRequest`, `CreateAccessPointCommandInput`, `AST.Expression`, `CalendarType`, `ASNDBS`, `IWrappedEntity`, `SerializedEvent`, `LoginSuccessCallbackResult`, `CopyImageCommandInput`, `ProfileModel`, `AreaUI`, `SeparableConvParams`, `builder.IDialogResult`, `RQuota`, `view.View`, `ContestModel`, `RecentCompletionInfo`, `TodoListApplication`, `AlyvixApiService`, `TokenDetails`, `ArrayWrapper`, `ValueDB`, `d.ComponentRuntimeMetaCompact`, `Fu`, `CancelExportTaskCommandInput`, `DatePicker`, `IOptionsFullResponse`, `PageIndex`, `SubmitTexture`, `LinearFlow`, `TRawConfig`, `Driver`, `ExchangeQueryService`, `MethodOrPropertyDecoratorWithParams`, `OrderService`, `SessionPromise`, `InviteMembersCommandInput`, `BlockCipher`, `CustomFieldDefinition`, `Prisma.JsonValue`, `ItemsService`, `LoadMetricInformation`, `message`, `StagePanelDef`, `IAudio`, `TimeValues`, `RemovableAnalyserNode`, `ResolveRecord`, `ReadFileResult`, `SecureStore`, `IOutputOptions`, `RBNFDecimalFormatter`, `Phaser.GameObjects.GameObject`, `BufferChannel`, `ActivityInfoModel`, `AggregateMeta`, `TranslateService`, `FileStorage`, `os.NetworkInterfaceInfo`, `MatBottomSheetConfig`, `PyrightPublicSymbolReport`, `TSeed`, `TokenStorage`, `CoercibleProperty`, `DSColumnType`, `OptionFC`, `PostConditionMode.Deny`, `MagentoAggregation`, `ItemRequest`, `PublicVocabulary`, `Oscillator`, `MatchedContext`, `IAvailabilitySlotsCreateInput`, `UserDataContextAPI`, `PackagePolicy`, `IOperationType`, `MutationResult`, `DeletePackageCommandInput`, `Graphics`, `ASTPath`, `RawDatum`, `RDBType`, `Msg`, `BotsState`, `NamedTensorsMap`, `ObjectLiteralElementLike`, `HemisphereLight`, `UserGroup`, `ISampleToChunkBox`, `CellInput`, `ApmBase`, `pe`, `MockNgZone`, `BlokContainerUserSettings`, `MockBaseService`, `Packages`, `MetadataCache`, `FlatRow`, `EngineMiddlewareParams`, `Types.Config`, `DispatchPattern`, `DOMMatrix`, `Realm.ObjectSchemaProperty`, `ts.ObjectType`, `Overrides`, `SurveyModel`, `ColumnAnimationMap`, `VpnClientIPsecParameters`, `ConventionalCommit`, `TestCallback`, `AnyModel`, `ProvisionByoipCidrCommandInput`, `JssContextService`, `Video`, `ChildrenService`, `CreateStateHelperFn`, `FieldDefinitionNode`, `CollectorEntity`, `WorkerChild`, `DownloadStationTask`, `NodeID`, `LayerProperties`, `SortedSetItem`, `PartialEntityCollection`, `FavoriteGroup`, `code.Range`, `UpdateReplicationConfigurationTemplateCommandInput`, `ParsedExampleTree`, `Pages`, `IMessageParser`, `ThemeServiceStart`, `ISiteScriptAction`, `ProxyRule`, `Keyboard`, `Paragraph`, `lex.Token`, `DataUnitUp`, `DropdownComponent`, `PackageLock`, `LightSet`, `ListEnvironmentTemplatesCommandInput`, `Streamer`, `PathContext`, `TimeChartSeriesOptions`, `Breadcrumb`, `CreateUserCommandOutput`, `ICharAtlasConfig`, `SearchEnhancements`, `TableValidator`, `BuildTarget`, `GPUData`, `FirewallPolicyRuleCollectionGroup`, `FortuneOptions`, `ListJobsByPipelineCommandInput`, `NodePoolPlacementConfigDetails`, `Discord.Client`, `ProviderPosition`, `StorageDriver`, `ISkill`, `Events.enterviewport`, `IOriginNode`, `DefaultEditorAggSelectProps`, `BlankLineConfig`, `BlockMap`, `IComparatorFunction`, `sdk.PullAudioInputStream`, `CfnParameter`, `AuthenticationModel`, `StynTree`, `BabelDescriptor`, `Switchpoint`, `TokenDict`, `ECompareValueType`, `PointComponentProps`, `ReviewerReadModel`, `PageEvent`, `CreateSnapshotCommandInput`, `AckRange`, `CachedValue`, `IStatisticSum`, `ExtractActionFromActionCreator`, `TestCase`, `EnvironmentSettings`, `TaskCacheSession`, `TypeValues`, `CustomSpriteProps`, `CancellationToken`, `IInviteGroupUsersResult`, `BillId`, `CreateRoomRequest`, `requests.ListAvailabilityHistoriesRequest`, `Trail`, `MnemonicVariationsX86`, `DataConvertType`, `DescribeAccountCommandInput`, `DOMStringList`, `ErrorNode`, `GfxTextureDescriptor`, `CommunicationIdentifier`, `SavedObjectLoader`, `egret.DisplayObjectContainer`, `HTMLCollection`, `ItemIndex`, `DiagnosticTag`, `ReadonlyVec2`, `DiscoverSetupPlugins`, `InferenceFlags`, `ISocketBase`, `protos.common.CollectionConfigPackage`, `ReplicaDetails`, `StudyConstraint`, `ARAddModelOptions`, `ForbiddenException`, `Status`, `IPluginData`, `PartsModel`, `FooterProps`, `ISampler3DTerm`, `TransferListOptionBase`, `RunProps`, `InvalidFormatError`, `angu.Value`, `DataItems`, `CardDatabase`, `RunTaskOption`, `InternalParser`, `AggsStartDependencies`, `EntitySchemaService`, `TooltipInfo`, `PluginOptionsSchemaArgs`, `TimefilterConfig`, `sdk.Connection`, `HsLayerSelectorService`, `TRight`, `ExtOptions`, `code.Uri`, `UiActionsService`, `ReactClientOptionsWithDefaults`, `CsmPublishingCredentialsPoliciesEntity`, `CommonFile`, `EditDashboardPage`, `WalkerDown`, `LazyCmpLoadedEvent`, `IntegrationSettingService`, `CreateRequestBuilder`, `Pulse`, `DomRecorder`, `Redirect`, `EVMPayload`, `HsUtilsService`, `SpreadSheet`, `requests.ListAutonomousDatabaseBackupsRequest`, `RequestProfile`, `CodeLensBuffer`, `PerformanceStatistics`, `CategoryProps`, `CrochetForNode`, `AudioParam`, `FormatterConfig`, `IDeploymentCenterPublishingContext`, `OffsetPosition`, `FtrConfigProviderContext`, `DAL.KEY_W`, `AssetBalance`, `Starter`, `IRoute`, `Moltin`, `TableReference`, `ReadonlyPartialJSONObject`, `ErrorTransformer`, `TypedMutation`, `requests.GetRRSetRequest`, `IOptimizelyAlphaBetaTest`, `BattleDetail`, `WriteTournamentRecordRequest`, `ProfilerFrame`, `Folder`, `ITerminalChunk`, `TPageWithLayout`, `ModelInstance`, `ListWorkspacesCommandInput`, `FileDeclaration`, `CPlusPlusRenderer`, `FormItem`, `AnimationInfo`, `StringEncodedNumeralFormat`, `d.TransformCssToEsmInput`, `IntlMessages`, `OperationError`, `requests.ListProjectsRequest`, `DataDrivenQuery`, `InteriorNode`, `UriMatchType`, `ScrollToOptions`, `ModifyDBClusterSnapshotAttributeCommandInput`, `Real_ulonglong_numberContext`, `FabricWalletRegistryEntry`, `TransactionState`, `TGraphQLContext`, `SourceStatus`, `BrandModuleBase`, `IntBuffer`, `ARRotation`, `DatasourceRef`, `UInt128`, `AsyncFluidObjectProvider`, `CheckBoxProps`, `DescribeEventCategoriesMessage`, `core.CallbackOptionallyAsync`, `DocfyService`, `DocumentSnapshot`, `BitcoinCashBalanceMonitorConfig`, `PDFRef`, `Rarity`, `HTTPNetworkInterface`, `SyntaxKind.Identifier`, `ParameterDesc`, `PostRoles`, `CustomLocale`, `STDeclaration`, `ExtractRouteParams`, `SceneComponent`, `ConnectDetails`, `ContractCallBuilder`, `Dialogic.State`, `Others`, `NetworkFilter`, `StyleSheetData`, `SymbolInfo`, `JsonDocsEvent`, `VuforiaSessionData`, `RefreshAccessTokenAccountsValidationResult`, `RenderArgsDeserialized`, `AutofillScript`, `UpdateSubnetGroupCommandInput`, `Survey.JsonObjectProperty`, `UserVariableContext`, `IdentifierType`, `ThyTreeNodeData`, `estree.Program`, `IterationUse`, `AssetUtils`, `StoreSetter`, `GraphinProps`, `ExperimentInterface`, `storeType`, `JsxOpeningFragment`, `ApiTypes.Feed.Hide`, `GameInfo`, `Polygon`, `FocusPath`, `ComparisonResult`, `IWorkerArgs`, `BaseWatch`, `Menu`, `StationModel`, `SinonSpyCall`, `TransformOption`, `ComponentInterface`, `BindingOrAssignmentElementTarget`, `FocusEventHandler`, `TViewNode`, `Employee`, `IMemoryDb`, `ConnectionDictionary`, `ElementRenderer`, `AtomicAssetsContext`, `ModuleElementDeclarationEmitInfo`, `CogStacJob`, `IconService`, `OnMessageFlags`, `Optimizer2`, `SnapDB`, `DateUtilsAdapter`, `FloatKeyframe`, `Pets`, `FavouritesState`, `Callbacks`, `UiSettingsClient`, `ReactDivMouseEvent`, `PdfObjectConverter`, `T.ID`, `ServerObject`, `StoreConfiguration`, `ParenthesizedTypeNode`, `Animated.CompositeAnimation`, `GqlContext`, `RepositoryKind`, `RenderDeps`, `PointerState`, `ISendOptions`, `PropName`, `d.JsonDocsComponent`, `IUnit`, `ExecuteCommandParams`, `GetMetricDataCommandInput`, `ReadValueIdOptions`, `C2dRenderTexture`, `DraggableProvided`, `userData`, `ReducersMapObject`, `DistributionProps`, `DoubleValue`, `Draggable`, `CharacterClass`, `GetFolderCommandInput`, `NonFungibleTokenAPI.Options`, `VertexDeclaration`, `NavigatorGamepad`, `BodyContent`, `Char`, `TestRouter`, `restify.Request`, `EditProps`, `TestTracer`, `ChlorinatorState`, `Pet`, `TopicInterest`, `IPlatform`, `CreateDashboardCommandInput`, `TestDTO`, `ModuleResolver`, `ITagUi`, `ImporterRegistry`, `ApiErrorParams`, `MicrosoftStorSimpleManagersResources`, `$G.IGraph`, `ObjectStorageClient`, `ts.ConditionalExpression`, `requests.ListComputeCapacityReservationInstancesRequest`, `App.services.IWindowService`, `AuthenticationExecutionInfoRepresentation`, `Setting`, `RangeResult`, `ServerTransferStateTranslateLoader`, `ResourceConstant`, `HRTime`, `FrequencySet`, `GLTFNode`, `StringDocument`, `apiKeysObject`, `Ticks`, `StacksMessageType`, `Cmp`, `BoneDesc`, `Ogg.IPageHeader`, `Screenshoter`, `ISignaler`, `OpenIdConfig`, `ListOperations`, `O1`, `MultiMaterial`, `BabelFileResult`, `Refactoring`, `RNSharedElementStyle`, `OrganizationEditStore`, `MsgCloseGroup`, `NzI18nInterface`, `MicrophoneConfig`, `AppearanceCharacteristics`, `SegmentRef`, `ImportSpecifier`, `EventSourceHash`, `VisualizePlugin`, `IEntityMetaOptions`, `Thunk`, `LegacySpriteSheet`, `UInt160`, `QueryEnum`, `DecomposedJwt`, `Sentence`, `TerraformVars`, `StickerOptions`, `ScrollByY`, `CreateOrganizationCommandInput`, `DefaultKernel`, `StackActivity`, `Tests`, `Backup`, `ProductVariantPriceService`, `PeerType`, `IlmPolicyMigrationStatus`, `DebugProtocol.VariablesResponse`, `DateIntervalDescriptor`, `SendParams`, `Unregistration`, `GeneralCallbackResult`, `TitleTagService`, `GradientBlockColorStep`, `IdentifyEventType`, `DeployStacksIO`, `SearchServiceSetupDependencies`, `ModalDialogParams`, `url.URL`, `UnicodeUtils.Direction`, `NzMessageDataOptions`, `IExtentChunk`, `BandHeaderNS.CellProps`, `ISubscriberJwt`, `TableFinder`, `PanGestureHandlerStateChangeEvent`, `MockOptions`, `StudioModelData`, `LicensingPlugin`, `TwitterUser`, `NativeScriptPager`, `JsonaObject`, `V.Scheme`, `ParamConfig`, `GetRevisionCommandInput`, `PersonaIdentifier`, `Appservice`, `StringWriter`, `STDataSourceOptions`, `WS`, `BufferLine`, `TransferHotspotV1`, `TScheduleData`, `NothingShape`, `vscode.MessageOptions`, `ContractMetadata`, `SingleASTNode`, `DefsElementMap`, `IQuaternion`, `IRectangle`, `DialogPropertySyncItem`, `IMiddlewareClass`, `requests.ListCategoriesRequest`, `DebouncedFunction`, `MenuComponent`, `Validator`, `Schemas`, `DetectorEnum`, `UniqueSection`, `OnClickData`, `TransactionAndReceipt`, `ComponentRuntimeMetaCompact`, `AuthenticationSession`, `IAddress`, `FieldPath`, `IsoLayer`, `GeoService`, `LegendPosition`, `OnProgressCallbackFunction`, `UITabBarController`, `DocumentUnderstandingServiceClient`, `TTypeName`, `ISet`, `SFARenderLists`, `Widget`, `TComponent`, `NetworkgraphPoint`, `DepNodeAssembly`, `SyncOpts`, `StackItemLike`, `WordcloudUtils.PolygonPointObject`, `PickerDelegate`, `KC_PrismData`, `LensState`, `MonitoringParametersOptions`, `INodeFilter`, `EndPoint`, `Contact`, `GeneratorContext`, `TestBed`, `CachedPackage`, `EnvOptions`, `Arbitrary`, `MediatorService`, `StackProps`, `SpecRoleCapabilities`, `PlacementContext`, `PathBuilder`, `RtfDestination`, `SlideDirection`, `SourceService`, `WorkerTestHarness`, `ThunkArg`, `ValidationEventTypes`, `Visit`, `PaymentsError`, `LoggingMetaData`, `VolumeTableRecord`, `Matrix`, `LegendStrategy`, `AddressType`, `EncodedDeviceType`, `PageState`, `apid.RuleSearchOption`, `PieceModel`, `ICommands`, `CombatZerg`, `ThemeProps`, `Contacts`, `WebSiteManagementModels.SiteConfigResource`, `MarkerBase`, `OtCommand`, `WellKnownTextNode`, `TLockfileObject`, `FieldMeta`, `UseAsyncReturn`, `FactoryArgs`, `PreviewState`, `ChipsItem`, `RequestBody`, `HierarchyRpcRequestOptions`, `DashboardSetup`, `ToolbarTest`, `ModelConfig`, `IsLocalScreenSharingActiveChangedListener`, `CallbackDataParams`, `GitHubRepositoryModel`, `Launcher`, `CustomFunctions`, `IThread`, `WebSqlTx`, `H5GroveEntityResponse`, `evt_disasm_sub`, `WithName`, `TileLoaderState`, `SourceEditorArgs`, `RefInfo`, `Disposable`, `TypeAnnotationNode`, `AttachedPipettesByMount`, `HttpResponseInternalServerError`, `LayoutMaterial`, `TagType`, `ISampler2DTerm`, `KeyLabel`, `CraftBlock`, `STPPaymentMethod`, `ClusterExplorerNode`, `XMessageOption`, `TransactionsModel`, `NetWorthItem`, `GleeMessage`, `PutAppInstanceRetentionSettingsCommandInput`, `ProblemTagEntity`, `RuleObject`, `BaseTxInfo`, `SelectNode`, `requests.ListRulesRequest`, `VideoType`, `TranspileResults`, `HeatmapTable`, `PersistConfig`, `ColorInputProps`, `CollatedWriter`, `SqlTuningTaskStatusTypes`, `ModList`, `ResizeObserverService`, `IMessageListenerWrapper`, `VaultStorageService`, `Future`, `StackHeaderProps`, `AttributeDatabindingExpression`, `AzureWizard`, `MediationRecipientService`, `Exponent`, `ILineIndexWalker`, `ServiceExtension`, `CollisionGroup`, `DemoExecutor`, `AuthHeaders`, `FormValidationResult`, `DataTableColumn`, `CookieOptions`, `ActionBarProps`, `CustomRegion`, `FourSlash.TestState`, `L2Item`, `PropertyFactory`, `FindOptions`, `KuduClientContext`, `ResumeNode`, `CreateAggConfigParams`, `BootstrapOptions`, `EchPalette`, `SSOAdmin`, `StateVisNode`, `common.WaiterConfiguration`, `ChannelInflator`, `ValueClickActionContext`, `MatDialogRef`, `GetWorkRequestResponse`, `DomainEntity`, `JRPCEngine`, `ISnippetInternal`, `FederatedAdapterOpts`, `BufferConstructor`, `StyledTextProps`, `UserFunctionSignature`, `Command`, `EvaluateHandleFn`, `KeyframeTrack`, `DebugSessionOptions`, `SendCommandRequest`, `LanguageData`, `IData`, `IVar`, `PackageExpanded`, `LineAnnotationSpec`, `GaugeSettings`, `IPSet`, `VertexAttributeEnum`, `KeyPairBitcoinCashPaymentsConfig`, `PDFOperatorArg`, `CuePoint`, `ReplacementRule`, `MonitorRule`, `NotificationList`, `MonthPickerProps`, `AuditedAttributes`, `UtilConvertor`, `CmsModelFieldValidation`, `database.DataSnapshot`, `Verifier`, `FetchedBalances`, `RtcpTransportLayerFeedback`, `DaffCategoryFilterRequestEqualFactory`, `IsSpeakingChangedListener`, `DriverException`, `SecretData`, `t.Statement`, `pulumi.Input`, `Clip`, `QueryListProps`, `NavigationContainerRef`, `GfxBufferFrequencyHint`, `ProviderService`, `SphereGeometry`, `IValidatorOptions`, `OptionsWithUrl`, `StoreObjectArg`, `SharingUpdate`, `HttpAdapterHost`, `GraphicsLayer`, `T17`, `TypographyProps`, `zowe.IUploadOptions`, `RO`, `OnRenderAvatarCallback`, `JFlap`, `FlamelinkFactory`, `RequiredParams`, `MonzoBalanceResponse`, `TestElement`, `ITfsRestService`, `APIEndpoint`, `PartyCreate`, `JWT`, `Buffers`, `WebService`, `PostMessageStorage`, `Sqlite.Statement`, `GlobalLogger`, `TokenInfo`, `DataTransferItemList`, `CreateHotToastRef`, `FullUser`, `DoorLockCCConfigurationReport`, `MapProps`, `RoleRepository`, `CipherAlgorithm`, `EffectSystem`, `ethereum.UnsignedTransaction`, `IWalletContractServiceStrategy`, `TemplateHandlers`, `DividerProps`, `TriumphRecordNode`, `ATNState`, `LoggerNamespace`, `Discord.TextChannel`, `QueryExpressionParensContext`, `EntryId`, `UpdateServerCommandInput`, `DeleteDataSourceCommandInput`, `HttpResponseCodeCommandInput`, `AsyncIterableExt`, `PopupManager`, `E2EScanScenarioDefinition`, `uproxy_core_api.Update`, `UseSRTP`, `DateRangeValuesModel`, `ArgumentsCamelCase`, `d.CompilerBuildResults`, `CodebuildMetricChange`, `CollectionProp`, `ContentBlock`, `SVGMark`, `EnumOption`, `XUL.chromeWindow`, `Finder`, `FileBrowser`, `ICredentialsState`, `KeyBindings`, `ConditionResolver`, `SystemVerilogSymbol`, `ir.Stmt`, `LegacyAPICaller`, `AutoTranslateSummaryReport`, `InspectorViewProps`, `DefaultVideoStreamIdSet`, `AccountIdRequestMessage`, `ActionsConfigurationUtilities`, `WlPeer`, `RuleValidator`, `ThemeValue`, `BoolValue`, `Spinnies`, `GetPartitionIndexesCommandInput`, `LineConfig`, `URLLoader`, `ErrorSubscriptionFn`, `FMOscillator`, `MdcElementPropertyObserver`, `RESTClient`, `Orphan`, `PackagesConfig`, `SocketMessages.produceNum`, `Runner.Utils`, `IMatcher`, `CollaboratorService`, `UserContext`, `vscode.Range`, `TextComponent`, `AssemblyOption`, `ELO.RankState`, `INotificationsService`, `GfxPlatformWebGL2Config`, `ReactNativeContainer`, `Id64String`, `ShaderProgram`, `CliConfig`, `Waveform`, `SourceSymbol`, `DropdownItem`, `InputRegisterMaster`, `AttributeServiceOptions`, `SelectorGroup`, `StylesProps`, `ScriptParametersResolver`, `Pools`, `NumberOptions`, `DeprecatedButtonProps`, `JsonVisitor`, `GenericTwoValues`, `TypeMap`, `SimpleBinaryKernelImpl`, `requests.ListFunctionsRequest`, `Ok`, `LibraryEngine`, `ThyButtonType`, `WorkspaceSchema`, `d.MsgToWorker`, `DataValidationCxt`, `NgActionBar`, `GpuState`, `ng.IFilterService`, `ProfileState`, `NetworkManagementClient`, `MDCRadioAdapter`, `DedentToken`, `GX.DistAttnFunction`, `ArXivStorage`, `IRuleCheck`, `VertexEvent`, `AxisComposition`, `MessagesService`, `ValidationFunc`, `DeleteRuleCommandInput`, `Links`, `EncodedPaths`, `NamedFluidDataStoreRegistryEntries`, `ExploredCohortState`, `TextWrapper`, `DescribeComponentCommandInput`, `XY`, `TypeAliasDeclarationStructure`, `Promisable`, `LegacyDrawing.Animation`, `Simulator`, `OhbugExtension`, `INode`, `MangaDetailsFields`, `FileUploader`, `OutboundMessage`, `MergedBuildFileTask`, `BlockDefinitionCompiler`, `QueryCertificatesRequest`, `AttendeeModel`, `TableName`, `MessageHashService`, `FlowItemAssign`, `EntryProcessor`, `NzDestroyService`, `MultilevelSwitchCCStartLevelChange`, `CommitSequence`, `IgnoredCommentContext`, `HTMLIonLoadingElement`, `OperationTypes`, `FragmentSpread`, `OrderFormItem`, `ShortTermRetentionPolicyName`, `HardwareConfiguration`, `BasicCalculator`, `ContextConfig`, `MediaQueryData`, `Animated.Animated`, `FleetAuthzRouter`, `DefaultTreeDocument`, `T2`, `MetaesContext`, `d.PrerenderConfig`, `ISeriesApi`, `IQueryListRendererProps`, `VersionHistoryDataService`, `N7`, `ReportGenerator`, `GetDevicePositionHistoryCommandInput`, `TwingTemplateBlocksMap`, `TReturnType`, `IDescribeRunner`, `VisibilityMap`, `ERC721ContractDetailed`, `PanelModel`, `PerformAction`, `TestConfiguration`, `MkFuncHook`, `ChangeAccumulator`, `ActivityStreamsModel`, `Algebra.RootNode`, `AbstractDistanceCalculator`, `Switch`, `PipelineStatus`, `ChainEventSubscriberInterface`, `V1StatefulSet`, `BucketAggTypeConfig`, `freedom.Social.ClientState`, `DomainsListOptionalParams`, `TokenFetchOptionsEx`, `ComponentHolder`, `BlockchainService`, `apid.Rule`, `HttpResources`, `OrderByDirection`, `GithubClient`, `KeyType`, `SyncType`, `HSVColor`, `PopoverTargetProps`, `CacheEntryListener`, `MDCNotchedOutlineAdapter`, `UiStateStorageStub`, `LoadStrategy`, `ThyAutocompleteRef`, `UpdateFindingsCommandInput`, `MeasureFormatter`, `requests.ListCloudVmClusterUpdatesRequest`, `CreateCampaignCommandInput`, `NodeConfig`, `BranchSummary`, `WalletAccount`, `SF`, `HttpContextContract`, `AggregationFrame`, `ParquetField`, `IntlShape`, `FileSchemaKey`, `GenerateFunctionOptions`, `AssignmentPattern`, `DeleteSourceServerCommandInput`, `TensorArray`, `GeneratorSourceConfig`, `IBifrostInstance`, `CreateVpcLinkCommandInput`, `DeleteWriteOpResultObject`, `NumericB`, `AnimationNode`, `QueryDslQueryContainer`, `StatePropsOfCombinator`, `BaseResourceHandlerRequest`, `FieldFormatInstanceType`, `RouteComponent`, `IScore`, `HTMLLineElement`, `WindowComponent`, `TestExplorer`, `Cropper`, `RBNFCollector`, `DeferredPromise`, `SubstrateExtrinsic`, `SetLanguage`, `MarkdownDocument`, `RoomParticipantIdentity`, `DiezType`, `ConfigPath`, `NPCActorCaps`, `Feed`, `CC`, `SelectorItem`, `MutationName`, `ChainInfo`, `ModelRenderContext`, `DynamoDB.ReturnConsumedCapacity`, `StylableModuleSchema`, `ServerInfo`, `ChildProps`, `TransportParameters`, `RouteWithValidQuote`, `LinkedContracts`, `ARMRamItem`, `CachedProviders`, `MilkdownPlugin`, `GRUCellLayerArgs`, `TMeta`, `DialogProps`, `IApplicationContext`, `HandlerDomProxy`, `InsightShortId`, `HttpResponseCreated`, `ArrayNode`, `IListenerRule`, `React.StatelessComponent`, `GraphConfiguration`, `WatchEffectOptions`, `ShowProgressService`, `SymbolFormatFlags`, `FormGroup`, `VariableDeclarator`, `XIdType`, `SafeSelector`, `TypeReferenceSerializationKind`, `NodeSorter`, `Limit`, `Sanitizer`, `AllureStep`, `DescriptorProto_ExtensionRange`, `ScalarCriteriaNode`, `Octant`, `MapPool`, `ArticleOverview`, `LogCorrelationContext`, `DestroyOptions`, `TargetDetectorRecipe`, `B14`, `TrackData`, `LoadableMeta`, `CredentialRequestOptions`, `QTransition`, `CustomTemplateFindQuery`, `UIView`, `Subscribers`, `SpawnClose`, `GridStackModel`, `MDCTextFieldLabelAdapter`, `Events.preframe`, `IExpectedIdToken`, `PIXI.Point`, `AliasOrConnection`, `TLineChartPoint`, `LookupExpr`, `MongooseFilterQuery`, `IReference`, `PanelComponent`, `ParsedQuery`, `ProcessRequirement`, `RoomInfo`, `IRemoteUser`, `ScopedClusterClient`, `GetPropertiesResponse`, `ParameterInvalidReason`, `SavedObjectsClosePointInTimeOptions`, `PublicShare`, `ClientDTO`, `FloorCode`, `ProjectInput`, `SocketServer`, `RouteState`, `DOMWrapper`, `IEvmRpc`, `New`, `QueryMode`, `CompletedGatewayOptions`, `ResourcesWithAttributedChildren`, `IEmailDomain`, `FoodsFilter`, `IWrappedEntityInternal`, `Endpoints`, `DMMF.SchemaArg`, `ClassVarInfo`, `NavigationIndicator`, `OpenAPI.Schema`, `ZonedMarker`, `CheckerType`, `ForgeModMcmodInfo`, `IAppSetting`, `OclExecutionContext`, `ThyNotifyOptions`, `AnnotationSpec`, `TextInputOptionProps`, `ProcessLock`, `GitDiff`, `GraphQLGenie`, `GameVersion`, `PotentialPartnersState`, `ListAppInstanceAdminsCommandInput`, `PosAndDir`, `monaco.Position`, `MapperService`, `ESLintNode`, `EnvironmentOptions`, `KernelMessage.IMessage`, `TextDirection`, `requests.ListEventsRequest`, `enet.IDecodePackage`, `HttpParams`, `parser.PddlSyntaxNode`, `VaultTimeoutService`, `GitHubRepo`, `PlaneByOriginAndVectors4d`, `HSLA`, `CreateWorkspaceCommandInput`, `AsyncStorage`, `TicketDoc`, `AnimationInstance`, `IControllerAttributeProvider`, `IteratorOptions`, `AuthorizeOptions`, `PedAppearance`, `AuthorizationData`, `IContentSearchFilter`, `TEventHandler`, `ForwardRefRenderFunction`, `Typed`, `JQueryXHR`, `model.Range`, `ReducerHandler`, `SegmentEvent`, `DAVAccount`, `FunctionPlotOptions`, `PoiGeometry`, `VirtualNode`, `InterfaceDefinitionBlock`, `CommandLineOptions`, `PhysicalObject`, `DeserializeAggConfigParams`, `FunctionTemplate`, `CssImportData`, `ApiRevisionContract`, `TokenPosition`, `ZoomStore`, `FooBar`, `InternalStyle`, `SignalingClientObserver`, `TransitionSettings`, `IResourceItems`, `BackendSrv`, `IExtractedCode`, `MemoryView`, `BlockAttributes`, `VerificationGeneratorDependencies`, `ChromeHelpExtension`, `TVariables`, `py.Expr`, `ResolverRpCb`, `WebGLTensor`, `JTDSchemaType`, `OtherNotation`, `Id64Array`, `ChainableConnectors`, `SortStateAPI`, `SendMessagePayload`, `CosmosClient`, `ts.TextChangeRange`, `OperationInfo`, `PointModel`, `HitResult`, `RuleGroup`, `Drive`, `NativeClarityBinProvider`, `ShrinkStrategyMock`, `StartServicesGetter`, `ITkeyError`, `StyleFunction`, `ListPolicyVersionsCommandInput`, `DeleteSiteCommandInput`, `LRU`, `NetNode`, `TagSpec`, `JSDocSignature`, `CannonBoxColliderShape`, `NextFnType`, `WorkerMeta`, `TinyHooks`, `MenuItemConstructorOptions`, `IVue`, `requests.ListGroupsRequest`, `MappingBuilder`, `IChangeInfo`, `requests.UpdateJobRequest`, `UpdateServiceCommandInput`, `MongoClientOptions`, `HelpCenterArticleService`, `IndexTemplateMapping`, `SchemePermissions`, `BehaviorHook`, `NotificationPayload`, `EffectRef`, `PartialOptions`, `ListApmDomainWorkRequestsRequest`, `CreditCardEscrow`, `GUIDriverOptions`, `SpriteData`, `ListItemBase`, `ChartComposition`, `PluginMetadata`, `providers.WebProvider`, `ModalInitialState`, `Luna`, `MultisigConfig`, `HttpUrlGenerator`, `TemplateConfig`, `Jump`, `PrismaClientDMMF.Document`, `SourceAwareResolverContext`, `MiddlewareStack`, `NewWindowWebContentsEvent`, `AsyncPriorityQueue`, `GetDeclarationParameters`, `IExperiment`, `ptr`, `TokenAddressMap`, `ethers.utils.Deferrable`, `DCons`, `PropertyAssignments`, `TydomController`, `Necktie`, `MockClientFactory`, `ZeroXOrders`, `GeneratorCore`, `Import.Options`, `InvalidRequestException`, `Legend`, `CommittersDetails`, `ConfigYaml`, `CPUTensor`, `MockStorage`, `TemplateOptions`, `GitHubActionWorkflowRequestContent`, `ClassWeight`, `Comparer`, `IParam`, `IFilterTarget`, `BuildProps`, `ProjectedPoint`, `StatusReport`, `alt.IVector3`, `LegacyDrawing.Sprite`, `Utilities.EventWrapperObject`, `ReorderAggs`, `ApexTestGroupNode`, `HTMLHeadElement`, `ReceiverEstimatedMaxBitrate`, `VariableDefinitionContext`, `FormatResult`, `TestingRunOptions`, `SettingsService`, `PacketHandler`, `ScalingPolicy`, `StakingCall`, `TextElementsRendererOptions`, `ModelSchema`, `School`, `Html2OrgOptions`, `StateData`, `UserSettingsModel`, `MarkerNode`, `StackResult`, `EventTypeService`, `ListTagsCommand`, `ListUsersRequest`, `Charge`, `MorphTargetManager`, `IsSpecificRowFn`, `Events.pointercancel`, `Patient`, `MaxPooling3D`, `TLeft`, `DomPath`, `Ants`, `APIGatewayProxyEventV2`, `Any`, `InputSpec`, `Fn`, `Association`, `RadioOption`, `GitlabUser`, `DragHandle`, `TypeIdentifier`, `AnimeFields`, `RouterInstruction`, `EChartsCoreOption`, `ng.IDirective`, `WorkerRequestEntry`, `Shortcuts`, `AttributionData`, `Point4d`, `RowArray`, `ECH.CommandClient`, `ReadyType`, `FileItem`, `FormConfig`, `OptionsConfig`, `RoverStateReturn`, `SpriteSpin.Data`, `MockableFunctionCallCompiler`, `IContainerRuntimeOptions`, `EmailConfirmation`, `CertificateAndPrivateKeyPair`, `IExternalHooksFunctions`, `Beacon`, `NetworkVirtualAppliance`, `ATNConfig`, `FrameEntryType`, `ts.ResolvedModuleWithFailedLookupLocations`, `LoopBounds`, `MockAthena`, `MigrationStatus`, `AdvertiseByoipCidrCommandInput`, `WorkflowState`, `ResponsiveMode`, `MentionsState`, `MTDTexture`, `SIDE`, `IMapSourceProvidersConfig`, `SignKeyPair`, `DocsService`, `Shape`, `ApexVariable`, `DragDropRegistry`, `Graphics.Texture`, `DeclarationName`, `DeleteBotCommandInput`, `QueryOrdersRequest`, `EditSettingsCommand`, `fromSingleRepositoryStatisticsActions.GetRepositoryStatistics`, `RoleListContext`, `GraphQLTypeResolver`, `TransmartSubSelectionConstraint`, `V1CommandInputParameterModel`, `SatRec`, `IBlockHeader`, `CookiesOptions`, `ListComprehensionForNode`, `MDCTextFieldFoundation`, `SpaceService`, `LazyIterator`, `BitReader`, `NodeRecord`, `IHandlerParameters`, `CipherView`, `UploadOptions`, `SharedFunctionStub`, `GetGroupRequest`, `WebApi`, `CoreUsageStatsClient`, `IReserveUpdateValues`, `InsertEvent`, `OnLoadParams`, `SubjectService`, `HomePage`, `EntityFetcherFactory`, `BoxVo`, `sdk.PushAudioInputStream`, `CloseEvent`, `SectionState`, `TestBadgeComponent`, `NoneType`, `Principal`, `MathjaxAdaptor`, `OrganizationsClient`, `DispatchProps`, `RollupTransition`, `VisibilityType`, `IndexTree`, `Point3dArrayCarrier`, `MultiWord`, `IApiCallback`, `ComponentSingleStyleConfig`, `IResponseAction`, `ParsedValue`, `BillingActions`, `IInputType`, `SignedVerifiableClaim`, `WeaveResult`, `SyntheticKeyboardEvent`, `DeepReadonly`, `SearchResults`, `d.FsWriteResults`, `Conv2DTranspose`, `Sharp`, `GetActionTypeParams`, `ExtendedUser`, `JoinTable`, `PartyJoinRequestList`, `BeancountFileService`, `Cypress.PluginConfig`, `UITableView`, `CurrentForm`, `IResponseMessageHandler`, `HoverParams`, `RowValidatorCallback`, `Positive`, `MultilevelSwitchCCSet`, `GovernObservableGovernor`, `SecuredFeature`, `ProductState`, `AntVSpec`, `IndicatorValuesObject`, `AccountDevice_VarsEntry`, `PlannerPage`, `EquatorialCoordinates`, `IKeyBinding`, `GraphTxn`, `Query`, `IUpSetDump`, `PaymentResponse`, `TagScene`, `StateT`, `PluginDeployerResolverContext`, `GraphQLRequestConfig`, `StdioOptions`, `ValuedRivenProperty`, `VueConstructor`, `OrderBalance`, `ECA`, `FnParam`, `LunarYear`, `DialogBase`, `Survey.Base`, `BreakpointMap`, `PDFAcroField`, `BarLineChartBase`, `ExtremaOptions`, `RemoteHotspot`, `VulnerabilityAssessmentName`, `IdOrSym`, `SignatureData`, `EmptyStatement`, `FIRStorageTaskSnapshot`, `HintMetadata`, `Core.Rect`, `requests.ListVmClusterPatchHistoryEntriesRequest`, `FeeLevel`, `MockCamundaWorkflow`, `HasAttributeExpr`, `IMapping`, `FlowPostFinally`, `ListResponseModel`, `IndexBuffer3D`, `StyledIconProps`, `CredentialTypesClass`, `SimulatorDatabase`, `QueryWithHelpers`, `IBazelCommandAdapter`, `CompleteOption`, `Footnote`, `DescribeCodeReviewCommandInput`, `CardsWrapper`, `MatchExpr`, `ConditionalTransactionCommitmentJSON`, `PriceState`, `ShellOptions`, `RecommendationSummary`, `DialogRef`, `SortedSetStructure`, `Deno.Addr`, `EditTransformFlyoutState`, `ISnapshotTree`, `DaffCategoryFilterRangeNumericPairFactory`, `DeleteStatus`, `IStateMachine`, `IsDeletedEventPipe`, `ElasticsearchClient`, `IListViewCommandSetExecuteEventParameters`, `TitleTagData`, `IdMap`, `LexoRankBucket`, `RelationPattern`, `Models.GamePosition`, `SavedObjectOptionalMigrationFn`, `MenuPopperProps`, `Asserts`, `ExecuteResultLine`, `theia.Disposable`, `SessionTypes`, `CLR0_ColorData`, `Defunder`, `CalculateInput`, `LabelMap`, `ImagePreviewProps`, `AxisType`, `FullAgentPolicy`, `GetSessionCommandInput`, `GfxRenderPassP_WebGPU`, `SteemiaProvider`, `FeatureDefinition`, `CollectionConfig`, `SidePanelProps`, `ExpressionFunctionOpenSearchDashboardsContext`, `RecipientType`, `ConceptServer`, `QueryOpts`, `apid.EditManualReserveOption`, `ScheduleConfiguration`, `TranslatedValueID`, `SafeElement`, `JackettFormattedResult`, `SVGRenderer`, `DebugCallback`, `InfoWindow`, `OsmRelation`, `AdapterContainer`, `LintConfig`, `Chunk`, `NavLinkWrapper`, `CategoryList`, `AddApplicationInputCommandInput`, `OrderInfo`, `TestContext`, `protos.common.IMSPRole`, `GetLifecyclePolicyCommandInput`, `squel.Select`, `moment.Moment`, `ApiResponse`, `Initial`, `ReturnCode`, `TxGeneratingFunctionOptions`, `LightBound`, `PuzzleLoader`, `Uint32Array`, `BalmEntry`, `CardContext`, `OUTPUT_FORMAT`, `Transporter`, `IBuildTaskConfiguration`, `APIService`, `WorkerDOMConfiguration`, `EVENT_TYPE`, `TIntermediate1`, `RobotApiRequestOptions`, `ShotRequestOptions`, `StateNode`, `IPipeline`, `ConstantJsExpr`, `CardType`, `Types.RouteCallback`, `logging.Log`, `BufferTokenizer`, `FontType`, `ReferenceNode`, `ITccSettings`, `MockS3`, `WalkStats`, `FlexDirection`, `AddAsTypeOnly`, `number`, `ResolvedTypeReferenceDirectiveWithFailedLookupLocations`, `MouseEventInit`, `VdmFunctionImportReturnType`, `ByteString`, `ODataSegment`, `PlayerPieceLocation`, `UnidirectionalTransferAppAction`, `CreateService`, `NameAndContent`, `TraceData`, `AllowedKeyEntropyBits`, `Thrown`, `PyVar`, `HeadingCache`, `CrowbarFont`, `ConfigStruct`, `CmsStructureConfig`, `IGraphData`, `DecryptedUserMessage`, `ModeName`, `VieraTV`, `DataTypesInput`, `GetReplicationConfigurationCommandInput`, `GrowStrategyMock`, `VueQuery`, `GuiObject`, `SiteEntry`, `ListComponent`, `ResourceData`, `Uint256`, `PrepareReactRender`, `SimpleRecordInput`, `RollingFileContext`, `GalleryService`, `TokenStore`, `MessageConversation`, `ResourceObject`, `SendEmailJsonDto`, `MonitorModel`, `QuestionToken`, `xDatum`, `ApiEditorUser`, `ImageDecoder`, `ChakraComponent`, `WholeHalfUnison`, `ConfirmProps`, `MalType`, `LoadEvents`, `vscUri.URI`, `CalloutArrow`, `alt.Player`, `AWS.DynamoDB`, `ConfigSource`, `IBranding`, `UnsignedTransaction`, `IDataPoint`, `ServiceBase`, `NoteCacheItem`, `SubdomainAndZoneId`, `StripeShippingMethods`, `ReactionMenuState`, `RebootBrokerCommandInput`, `AuthOptions`, `ListApplicationsCommand`, `auth.AuthenticationDetailsProvider`, `AugmentedProvider`, `PackTypeDefinition`, `RegistryPolicyTemplate`, `Psbt`, `GfxDevice`, `IParams`, `requests.UpdateProjectRequest`, `SynthDefResultType`, `SyncEngine`, `JsonDocsComponent`, `SavedObjectsFindOptions`, `EmailConfirmationValidator`, `PkSerializer`, `AreaChartOptions`, `t.Type`, `AsyncQueue`, `ThyAbstractOverlayPosition`, `THREE.Shader`, `TooltipService`, `SingleEmitter`, `DigitalWire`, `APIHandler`, `InitializeResult`, `DaffCartFacade`, `OperationGroupDetails`, `AuthType.Sponsored`, `ExpressRouteCircuitPeering`, `ParaType`, `CacheProvider`, `JSXElementAnalysis`, `ValidationErrorItem`, `IDBIndex`, `IOwnProps`, `ITrackItem`, `ChangeVisitor`, `Git.IStatusFile`, `ITestAppInterface`, `MailTo`, `IReporter`, `IErrorsManager`, `ContactSubscription`, `Audit`, `Generics`, `IndyProof`, `FrontstageProps`, `Events.predraw`, `YCommandInput`, `ITargetFilter`, `TaskDto`, `VarAD`, `BookSavedObjectAttributes`, `ComputedAsyncValue`, `PrintOptions`, `MCommentOwnerVideo`, `CalendarFieldsOptions`, `DynamicFormControlLayout`, `bigInteger.BigInteger`, `IHealthStateChunk`, `SVGSVGElement`, `InputEventType`, `LoadmoreNode`, `ThemeSettingsBreakpointAny`, `t.Identifier`, `Identify`, `MultiChannelAssociationCCReport`, `Force`, `PortRecordType`, `ts.VariableStatement`, `DeleteApplicationCloudWatchLoggingOptionCommandInput`, `IResultSetRowKey`, `AriaProps`, `PoolSystem`, `PlanetData`, `requests.ListLogGroupsRequest`, `AppsService`, `C9`, `TableFilterDescriptor`, `DestinationConfig`, `TFields`, `SCN0_Light`, `GetBotChannelAssociationsCommandInput`, `thrift.IStructCodec`, `AnimationSampler`, `ElementEntity`, `EngineArgs.EvaluateDataLossInput`, `d.PrerenderHydrateOptions`, `ModuleRef`, `EntityConfig`, `PolymorpheusContent`, `DisabledDateFn`, `HeatmapVisualizationState`, `QueryFn`, `ParameterContext`, `FileBuffer`, `DidSaveTextDocumentParams`, `FlattenedXmlMapCommandInput`, `ComponentVariablesPrepared`, `DOMError`, `GanttViewOptions`, `SpectatorService`, `TwitchBadge`, `BlockchainLink`, `ModLine`, `Inhibitor`, `IRuleSpecObj`, `IPropertyGridEditor`, `WhileNode`, `LineGraphicsOptions`, `V1RoleBinding`, `DirResult`, `DescribeReservedInstancesCommandInput`, `StripeEntry`, `GitOutput`, `StatementBodyContext`, `QRProvisioningInformation`, `PureComponent`, `lf.schema.TableBuilder`, `ChipColor`, `OverlayEventDetail`, `ObjectDefinition`, `ValuesStoreParams`, `ts.ParameterDeclaration`, `DirectoryItem`, `TaskObserversUnknown`, `CanvasFontWeights`, `ChannelLeave`, `Timezone`, `Stmt`, `AccessTokenInterface`, `QR.QueryResult`, `DbPull`, `CollectionViewer`, `Body`, `PluginsClient`, `IngredientOrResult`, `Genre`, `IUIEvent`, `HandlerFn`, `FABRuntime`, `next.AppLayer`, `Follower`, `MaybeNestedArray`, `ConceptConstraint`, `ViewBaseDefinition`, `Pipe`, `YieldNode`, `DeeplinkParts`, `DefaultContentNode`, `_GlobalJSONStorage`, `IMacroBuffer`, `DecoratorFn`, `MdcSelect`, `JobValidationMessageId`, `QueryParserVisitor`, `UpdateStackCommandInput`, `GunMsg`, `TreeMate`, `ContentChangedCallbackOption`, `MemberEntity`, `CustomOracleNAVIssuanceSettings`, `SourceTypes`, `PAT0`, `ArgTypes`, `AnyState`, `AbstractColumn`, `EffectDef`, `TestComponent`, `ISchemaGenerator`, `ItemBuilder`, `ConnectionContracts.ConnectParams`, `Pin`, `IListViewCommandSetListViewUpdatedParameters`, `SecurityRequestHandlerContext`, `NumberWidget`, `Database`, `LogConfiguration`, `DatabaseVulnerabilityAssessment`, `LendingPool`, `prng`, `ListEnvironmentsCommandInput`, `UncommittedChangesStrategy`, `IThemeWeb`, `BaseFrame`, `DeploymentConfig`, `MethodNode`, `fopAcM_prm_class`, `PolygonCollider`, `PDFOptionList`, `PopoverController`, `PutMessagesResultEntry`, `EitherAsync`, `FunctionComponent`, `IDetailsProps`, `CrochetCommand`, `PageScrollInstance`, `Types`, `Lit`, `GitStatusFile`, `MonthYearDate`, `RectangleEditOptions`, `ArchDescr`, `FilterExcludingWhere`, `GetThunkAPI`, `ProductA`, `InitStoreState`, `CephPoint`, `RtcpSenderInfo`, `SpacePropValues`, `HTMLScriptElement`, `HierarchyDefinition`, `UserController`, `CdsAlert`, `Team`, `MagitRepository`, `NodeTypes`, `RadioValue`, `PuppetASTObject`, `DotenvConfigOutput`, `IdentifierListContext`, `EntitySchemaDatatype`, `IEmployeeAppointmentCreateInput`, `Keys`, `SyncDB`, `EntityDispatcherFactory`, `GX.TexMapID`, `SpriteEffect`, `Messaging`, `PatchOperation`, `HealthCheckService`, `SafetyDepositDraft`, `StateHelper`, `IBlockType`, `SectionList`, `RuleDeclaration`, `Perspective`, `BaseListParams`, `Nodes.Node`, `FocusType`, `ThemeState`, `TaskEither`, `StampinoTemplate`, `WrappedComponentType`, `DataPumpExcludeParameters`, `ListCV`, `Initializer`, `Resolver`, `RotationType`, `DataConnection`, `FileBoxInterface`, `Listener`, `StdSignature`, `IFilterProps`, `FileShare`, `RepaymentRouterContract`, `UpdateUserCommand`, `APIGatewayProxyResult`, `Type_Which`, `CircuitMetadataBuilder`, `GeoContainmentAlertParams`, `ListAssociationsCommandInput`, `FilePreviewDialogConfig`, `DisplayNode`, `CertificateAuthorityConfigType`, `SpellInfoDetails`, `AppiumClient`, `NoteSnippetEditorConfig`, `WorkingDirectoryStatus`, `IRequestContext`, `Blueprint`, `WeakStorage`, `Popup`, `mediaInfo`, `ThunkType`, `FormArray`, `Stream.Readable`, `ResourceXML`, `ResizeObserverEntry`, `ElementProperty`, `DescribeSessionsCommandInput`, `ContentType1524199022084`, `HtmlRendererOptions`, `Empty`, `To`, `ListNodePoolsRequest`, `WhereClauseContext`, `PrivateInstance`, `MorphTarget`, `ABIDecoder.DecodedLog`, `ProblemLocation`, `ThisType`, `ActivityAction`, `Roadview`, `ChemController`, `WebSettings`, `IBlobINode`, `Http3PrioritisedElementNode`, `IOrchestratorState`, `Occurrence`, `DiffState`, `SecurityIdentity`, `PinejsClient`, `IObservableObject`, `PropertyCategoryRendererManager`, `ComponentInfo`, `DroppableProvided`, `GrpcAuthentication`, `JustifyContent`, `ServerException`, `Htlc`, `XhrCompleteContext`, `CliInfo`, `JSXAnalysis`, `CommandLine`, `TsDocumentService`, `LifecycleEvent`, `TargetResourceType`, `BlockState`, `HeftSession`, `GridState`, `UpdateAction`, `IField`, `ITypeEntry`, `commandInterface`, `TextChangeRange`, `StepConditional`, `DeclarationBlock`, `TimelineEvent`, `PublisherDoc`, `OrderSide`, `PiInstance`, `TestDispatcher`, `eventType`, `Issuer`, `Point2D`, `GitPullRequestWithStatuses`, `UsePaginatedQueryReducerAction`, `IVariableDefinition`, `ResourceComponent`, `FollowLinkConfig`, `ActiveSpeakerPolicy`, `PotentialEdgeInfo`, `CreateWidgetDto`, `DescribeTagsCommand`, `MaterialAlertDialogBuilder`, `DatasetEntry`, `LocationDescriptor`, `BoardTheme`, `EvalParam`, `IDBCursorWithValue`, `ITopic`, `TransactOptions`, `MarkerScene`, `ArgumentTypes`, `ChanLayer`, `MetaDataCollector`, `ReactHarness`, `ApolloQueryResult`, `UInt64`, `apid.RecordedTagId`, `AcceptInviteCommand`, `LogWidget`, `ISqlCommandParameters`, `BITBOX`, `d.TypesImportData`, `Mocha.SuiteFunction`, `LinuxParameters`, `IMarker`, `Element`, `IWebsocketMessage`, `OP`, `CustomConfigurationProvider1`, `T.Position`, `USBInTransferResult`, `XLSX.WorkBook`, `CheckResult`, `SMTDestructorGenCode`, `TemplateFileInfo`, `TEConst`, `CommandExecutionContext`, `PropertyCategoryLabelFilterer`, `GfxRenderInstManager`, `VRDisplay`, `HttpRequestWithFloatLabelsCommandInput`, `UpdateGroupCommandInput`, `ListExperimentsCommandInput`, `FunctionType`, `IFileChanges`, `AbiStateObject`, `OpenNodeTracker`, `IUpSetStaticDump`, `ProjectsStore`, `ViewEvent`, `Rights`, `GroupTypeUI`, `RTCIceTransport`, `IWorkerContext`, `TreeSelectOption`, `DSVEditor.ModelChangedArgs`, `DateRangeBucketAggDependencies`, `ReportId`, `AbiRange`, `IndexLiteral`, `HapticOptions`, `UIBrewHelper`, `NgModuleProviderDef`, `messages.TestStepResultStatus`, `ACM`, `ContractMethodDescriptorClient`, `angu.Context`, `MediaRequest`, `Service`, `Before`, `NextLink`, `nodes.Node`, `ServiceQuotas`, `UrlResolver`, `DocumentOptions`, `SVGPathFn`, `CredentialStore`, `BlockWithChildren`, `FsReaddirOptions`, `vscode.DecorationRenderOptions`, `ISerializedActionCall`, `EditionsList`, `BackupPolicy`, `SpyObject`, `HID`, `IListenerOptions`, `CreatedOrder`, `NavSegment`, `IBackendApi`, `Services.Configuration`, `FetchLinks`, `CfnIntegration`, `UpdateNetworkProfileCommandInput`, `TabWatcher`, `ResponderEvent`, `AudioFormat`, `HandleActionSharedParams`, `PutEmailIdentityDkimAttributesCommandInput`, `JQueryStatic`, `StoppingCondition`, `MetadataSelector`, `DefaultRequestReturn`, `IColumnWrapper`, `IEventSource`, `ContextTransformer`, `Bias`, `Cutline`, `ObjectMap`, `UnarynotaddsubContext`, `TypeInfo`, `SagaIteration`, `Dump`, `AST.Regex`, `SavedObjectOpenSearchDashboardsServices`, `Stylable`, `ParentGroup`, `HealthChecker`, `TContent`, `PaginationResult`, `WindowManager`, `CreateUserProfileCommandInput`, `TableFormDateType`, `KeyMacroAction`, `ABLTempTable`, `PopoverInitialState`, `LeakyReLU`, `IPermissionState`, `SlashCommand`, `LogStackedLayout`, `BitSet`, `ITrackEntry`, `RangeFieldMeta`, `CursorConnectionType`, `NodeVM`, `ShurikenParticleSystem`, `NodeArray`, `BaseAuthState`, `NearSwapTransaction`, `ThyOptionComponent`, `vile.Issue`, `BillCurrencyUnit`, `CellRange`, `Stop`, `InheritedProperty`, `instantiation.IConstructorSignature3`, `ConnectionStatus`, `DeepMocked`, `UpdateEntrypoint`, `BanesAndBoonsInfo`, `DBContext`, `PickerColumnOption`, `IPAddressEntry`, `DrawConfig`, `FontVariant`, `NotebookSessionShapeSeries`, `AABB`, `VisTypeDefinition`, `DBArg`, `MessageWriter`, `VerifyStream`, `MessagePort`, `Modify`, `TestClass`, `PathFunction`, `Hentai`, `ITransitionPage`, `ITableAtom`, `TimeConfig`, `DamageEvent`, `CreateAndTransferTransition`, `StripeShippingMethod`, `ListChannelModeratorsCommandInput`, `FeatureFlags`, `IPerformTasksCommandArgs`, `CreateServiceCommandInput`, `FillLabelConfig`, `TreeContext`, `RepositoryChangeEvent`, `ServerTreeItemPageObject`, `BaseView`, `WebCryptoDefaultCryptographicMaterialsManager`, `BackendValues`, `TSConfig`, `reflect.Assembly`, `IPrimaryKey`, `request.SuperTest`, `IReduxStore`, `DispatchedAction`, `ApolloPersistOptions`, `VerifiedStateUpdate`, `IMechanicsQuery`, `TypeScriptEmbeddedSource`, `JsxSpreadAttribute`, `IntelliJ`, `ProblemData`, `LayoutResult`, `Token.Token`, `Vp8RtpPayload`, `NormalizedNodeType`, `SeriesOption`, `CreateTopicResponse`, `BufferColumn`, `PlugyPage`, `JPAEmitterWorkData`, `IChangedArgs`, `UrlWithStringQuery`, `ButtonType`, `IOpenRepositoryFromURLAction`, `HelloWorldContainer`, `MdcIconRegistry`, `Positioned`, `td.SMap`, `HdPublicKey`, `CanaryExecutionRequest`, `CodeFile`, `VisualizeSavedObjectAttributes`, `TokenIterator`, `UI5ClassesInXMLTagNameCompletion`, `AnimationDefinition`, `InstanceDetails`, `CommandCreatorError`, `LockType`, `ByteData`, `GltfLoadOption`, `WorkTree`, `EntityT`, `TcpConnection`, `PointerInfoPre`, `Course`, `ActionGameState`, `ResourcePendingMaintenanceActions`, `DescribeDBClusterSnapshotsCommandInput`, `EventEmitter.ListenerFn`, `R.Chain`, `DAL.DEVICE_ID_LIGHT_SENSOR`, `IUserWithGroups`, `CategoryChannel`, `IClass`, `ITypedEdge`, `ChatFlowPack`, `Province`, `DocumentMigrator`, `ReporterConfig`, `MonacoEditorModel`, `IndexableNativeElement`, `WebAudioInstance`, `ResponseModel`, `ProColumns`, `IndexPatternsFetcher`, `BarChartDataPoint`, `VideoStreamRendererView`, `ErrorStateMatcher`, `ErrorObservable`, `WidgetManager`, `SCN0`, `ResourceHolder`, `ArgValue`, `TestPlan`, `HTTP_METHODS`, `BusInstance`, `ImageTemplate`, `LocalForageWithObservablePrivateProps`, `FoundationElementDefinition`, `phase0.BeaconBlockHeader`, `Tool`, `Privacy`, `DatePickerProps`, `Vector3D`, `SubgraphPlaceholder`, `Denomination`, `VpnSite`, `GrantInterface`, `PlotCurveTypes`, `StyleDeclaration`, `CompressedImage`, `DocgeniLibrary`, `TranspileModuleResults`, `AnimationService`, `FilterGroupKey`, `DiscoverTypings`, `PubKeyEncoding`, `DocTable`, `LayerValue`, `CallExpr`, `WebSocketChannel`, `FauxClassGenerator`, `IExecuteFunctions`, `TerraformBaseCommandInitializer`, `UserDeposit`, `IDate`, `MatGridTile`, `PrimitiveTypeDescription`, `BlueprintContainer`, `DeleteCertificateResponse`, `SpriteStyle`, `ListStreamsCommandInput`, `BrowsingData.DataTypeSet`, `RequestDetailsProps`, `ProxySide`, `AxeScanResults`, `DataTypesInput.Struct2Struct`, `SsrcDescription`, `fhir.Identifier`, `JsonClassTypeOptions`, `FsWatchResults`, `DeltaChangeContext`, `List`, `MembersState`, `CannonRigidbody3D`, `SignDoc`, `TransportParameterId`, `ComputedPropertyName`, `AcceptFn`, `next.Page`, `ObjectDetails`, `Tenant`, `d.CompilerRequestResponse`, `AbstractType`, `VirtualFileInterface`, `InteractionForegroundService`, `MultiStepInput`, `SNSEvent`, `GroupsService`, `TKind`, `UnitOfMeasurement`, `FetchType`, `React.ClipboardEvent`, `ParameterChange`, `ClickEvent`, `PageListItemProps`, `MemberSoundEffects`, `HandlerResult`, `ChangesetProps`, `TimeFormat`, `LSTMLayerArgs`, `MBusTransaction`, `ChildNodeType`, `ITokenResponse`, `MatchingLogic`, `ChainConfig`, `QueryError`, `GetAccountInfoRequest`, `CoreTracerBase`, `DidDocumentBuilder`, `AnyChannel`, `InvalidationLevel`, `Topics`, `StepperProps`, `UnorderedStrategy`, `UpdateOpts`, `CloudPoint`, `TransactionClientContract`, `CustomPropertyDecorator`, `NodejsFunction`, `BaseRender`, `RepositionScrollStrategyConfig`, `AnimationTriggerMetadata`, `ts.Map`, `FcUuidAuth`, `FovCalculation`, `Electron.MenuItem`, `Tlistener`, `SuggestionsRequest`, `IGBInstance`, `TypeDisplayOptions`, `Vpc`, `ProviderOptions`, `CatchupToLatestShareResult`, `MeterCCReport`, `ReadonlyObject`, `NotifyPlacement`, `AnalyserNode`, `ExecException`, `Mocker`, `BuildConfig`, `OptimizeJsResult`, `ITasks`, `AppContextService`, `f32`, `AbortIncompleteMultipartUpload`, `IKubernetesManifestCommandData`, `TemplateOutput`, `GraphQLRequestContext`, `MockStoreAction`, `SDKConfiguration`, `ex.ExcaliburGraphicsContext`, `VisibilityNotifier2D`, `InterfaceWithValues`, `EntitySystem`, `FileWatcherProvider`, `DOMOutputSpec`, `CeramicClient`, `Camera_t`, `Connex.Driver`, `ClaimToken`, `selectorParser.Node`, `SUUID`, `DatabaseState`, `RuntimeTable`, `CharacteristicValue`, `StoriesDefaultExport`, `GPUBuffer`, `PackageTarget`, `DynamoToPromise`, `SourceASTBuilder`, `NetworkIndicator`, `SpeechSynthesisVoice`, `CellProps`, `L1L2`, `PubkeyResult`, `LogSampleTimestamp`, `ControlPanelState`, `CircleModel`, `Ch`, `t.Expression`, `OwnProps`, `OcsShare`, `TransactionParams`, `PluginStreamActionPayload`, `ProtocolRequest`, `firebase.database.Reference`, `RSTPreview`, `StateBlock`, `MatListOption`, `ValueService`, `ThingProto`, `ProxyNode`, `ActionProcessor`, `ExtendedCodeAction`, `FetchOptions`, `AVRIOPort`, `DataNode`, `App.services.IUriService`, `DataRow`, `SyntheticErrorLabel`, `fixtures.Fixtures`, `RNCookies`, `ConnectedComponent`, `IElementRegistry`, `StopwatchResult`, `FakeDatasetArgs`, `ArcTransactionResult`, `FetchableField`, `SVGLineElement`, `TopNavConfigParams`, `TypeArray`, `PTG`, `PairSet`, `UpgradeSchemeWrapper`, `IEmbedConfigurationBase`, `RouterExtensions`, `InitialProperties`, `AccessoryTypeExecuteResponse`, `ActionConfig`, `Chatlog`, `Register32`, `ExceptionlessClient`, `AggregatedResult`, `requests.ListDatabasesRequest`, `PointSeriesColumn`, `ScoreService`, `SceneActuatorConfigurationCCSet`, `CommandFlags`, `InputText`, `KeyChange`, `UiAtomType`, `UpdateJobCommandInput`, `ApiPipeline`, `HDOMImplementation`, `React.Key`, `EventArgs`, `Warrior`, `ContractService`, `MediaUploadForm`, `ICellStructure`, `AlertData`, `IContextProvider`, `TouchData`, `BackendContext`, `IRecordedApiModel`, `CustomQueryModel`, `InvokeCreator`, `ts.NodeArray`, `MethodSignature`, `CloudDirectorConfig`, `HierarchicalItem`, `AddressProtocol`, `NodeController`, `grpc.Metadata`, `FilterNode`, `ValidatePurchaseHuaweiRequest`, `ProjectionOptions`, `PackagePolicyInputStream`, `PositionRange`, `SyncPeriod`, `AksClusterConfig`, `NatF`, `DirectSpiral3d`, `NavigationDirection`, `TemplateGroup`, `CompassCardConfig`, `RedisInterface`, `GithubRelease`, `ExistingAccountError`, `StageName`, `WindupMember`, `TSettings`, `AbstractStatusBarLabelItem`, `FigmaPaint`, `AxeResult`, `OctoServerConnectionDetails`, `CreateProfile`, `WaterPoint`, `ServerRequest`, `IPageData`, `TestState`, `JsExport`, `UtilityInfo`, `RetryConfiguration`, `ClassNames`, `SelectBuilder`, `ContextMenuDirection`, `GridColumnExtension`, `QueryListsCommandInput`, `ApiToken`, `Mixin`, `CheckableElement`, `AnimatedNode`, `FoodModel`, `GetEnvironentsForProjectEnvironmentResult`, `InitState`, `AttendanceMonth`, `requests.ListCachingRulesRequest`, `Android`, `CommitInfo`, `NodeJS.EventEmitter`, `InjectedIntl`, `FilterItem`, `InstanceTargetWithMetadata`, `InstanceConfiguration`, `OpenYoloCredentialHintOptions`, `WalletRecord`, `Multicall`, `SlotId`, `KeyboardKey`, `ToolItemDef`, `ValidatorBuilder`, `RestFinishedResponse`, `requests.ListDrgRouteDistributionStatementsRequest`, `MappedTopicsMap`, `InstanceSummary`, `WaveformRegion`, `IMapPin`, `MarkMap`, `HistoryItemImpl`, `DeleteRouteCommandInput`, `FabricEnvironment`, `NavigationPublicPluginSetup`, `StrategyOptions`, `Artifact`, `PasswordHistoryData`, `PostgreSQL`, `TTypeProto`, `Recordable`, `CollectionMetadata`, `ICustomField`, `RecordingSegment`, `DAL.DEVICE_ID_SERIAL`, `CalendarRange`, `ServiceHealthStatus`, `STColumnFilterMenu`, `DateFnsInputDate`, `WalletVersion`, `PngEmbedder`, `AnchorProps`, `SkipListSet`, `ts.Scanner`, `UpdateQuery`, `UploadedFile`, `HandleProps`, `PutRequest`, `ConfigStorage`, `IObjectOf`, `ResponseStream`, `TradeablePoolsMap`, `Turmoil`, `CreateAppOptions`, `requests.ListBackupDestinationRequest`, `BindingType`, `ActiveProps`, `TransitionController`, `Seg`, `CompilerJsDoc`, `CopyDescriptor`, `ForeignKeySpec`, `SVType`, `DecoratedComponentClass`, `WebviewPanelImpl`, `PropertyDetails`, `CoreSavedObjectsRouteHandlerContext`, `TTK1`, `PrimaryFeaturePrivilege`, `IDataFilterValueInfo`, `ObjectBindingPattern`, `FlexStyleProps`, `IRadio`, `RefList`, `OpMapper`, `VolumeType`, `PublicKeyData`, `YoutubeRawData`, `PayloadAction`, `PrismaPromise`, `IPullRequestListItem`, `SessionConfig`, `SpringValue`, `CapsizeOpts`, `kuberesources.ResourceKind`, `RelayServiceConnectionEntity`, `UpdateUserDto`, `ThunkResult`, `IModalServiceInstance`, `CameraController`, `GithubUser`, `ModuleMetadata`, `NET`, `ModalsStateEntry`, `ImportCertificateCommandInput`, `FatalErrorFn`, `ToastParams`, `SegmentClient`, `Armature`, `StellarBalanceMonitor`, `NotificationTemplateEntity`, `CacheManagerGetOptions`, `AllureTest`, `DialogPosition`, `TransferItemOption`, `FB3ReaderPage.ReaderPage`, `CommonToolbarItem`, `DerivedGauge`, `ExecutionRole`, `KeylistUpdateMessage`, `AppResult`, `QuickInfo`, `messages.SourceReference`, `IHeaderProps`, `AccountSteam_VarsEntry`, `ITelemetryBaseEvent`, `SubqueryProject`, `OpenApiDecorator`, `ContentGroupProps`, `LocalStorageSources`, `Codebase`, `MetricAggParam`, `WsConnectionState`, `BillName`, `ReflectionCategory`, `AuthRequired`, `PreparedData`, `RowItem`, `AttendanceDay`, `ILoginOptions`, `DecoratorDefArg`, `GameData`, `JMap`, `FunctionCall`, `SetVaultParameter`, `Perm`, `ISeinNodeExtension`, `SavedObjectsTypeMappingDefinitions`, `RelationalOperatorConfig`, `Work`, `CardHeaderProps`, `GitHubInfo`, `ListTagsCommandInput`, `PathProxy`, `ParsedLock`, `ProtocolConformance`, `VSTS`, `ReadModelRuntimeEventHandler`, `BarcodeInfo`, `LookupItem`, `CheckSearchSessionsDeps`, `CompilerEventFileUpdate`, `TypeCase`, `IConstructor`, `SecurityRequirement`, `RedditComment`, `ControllerType`, `TNerve`, `ICluster`, `ProofNodeX`, `GraphQLHOC`, `IGraphQlSchemaContext`, `ethers.providers.BlockTag`, `IconMap`, `getSubAdapterType`, `E2EPage`, `GaugeRangeProperty`, `NormalizedEntrypointItem`, `FactoryIndex`, `TypeReference1`, `TrackQueryOpts`, `Shuriken`, `PublisherProperties`, `UserFields`, `RoastingMachine`, `RemoveTagsFromResourceCommandOutput`, `RootContext`, `SplitInfo`, `TreeState`, `ButtonManager`, `Insights`, `ToggleButtonProps`, `CodeGenDirective`, `OpenDialogOptions`, `FolderComponent`, `BadRequestException`, `PlotAreaOptions`, `RuleFixer`, `SnippetVisibility`, `StorefrontApiContext`, `StoreContext`, `TImageType`, `Hull`, `SavedObjectsClientCommonFindArgs`, `ButtonHTMLProps`, `IUserDetails`, `IRoot`, `CSSSnippetProperty`, `ShoutySession`, `GraphQLQueryGenerator`, `StudentBasic`, `RepositoryInfo`, `ManifestEditor`, `Tipset`, `InvalidOperationException`, `MosString128`, `SourceDescriptionChunk`, `MetricsStore`, `Fig.Generator`, `vscode.EndOfLine`, `PathReference`, `IStatistics`, `GlobalCredentials`, `DigitalComponent`, `... 7 more ...`, `DragDropIdentifier`, `authors.Table`, `TaroText`, `RemoveEvent`, `DaffCategoryFilterRangeNumericRequest`, `BuilderRuntimeEdge`, `SurveyElementEditorTabModel`, `IGESDocument`, `CentralSceneCCConfigurationSet`, `GetUsersRequest`, `BaseHub`, `FormErrorProps`, `AspidaResponse`, `PriceAxisViewRendererCommonData`, `Specification`, `FlowCondition`, `Crosshair`, `MetricsPublisherProxy`, `DeployedWithoutEmailWallet`, `AppComponentDefinition`, `CheckBuilder`, `MeasureUnit`, `IStatusWarning`, `DSL`, `Immutable.Map`, `ADTClient`, `Source`, `CreateAddLinkOptions`, `CommsRecord`, `CoreDependencies`, `d.DevClientConfig`, `CrudTestContext`, `Types.IResolver`, `FormatType`, `ClusterResource`, `IProjectWizardContext`, `LocalAccount`, `MockedLogger`, `DeleteVpcPeeringConnectionCommandInput`, `HTTPHotspotObject`, `ContentTypeProperty`, `IRating`, `SavedQueryAttributes`, `IGetTimeLimitReportInput`, `OrderGraph`, `ImageCanvas`, `Tsconfig`, `PatternOutput`, `NDframe`, `IHttpClientOptions`, `ts.TypeAliasDeclaration`, `AggName`, `SourceType`, `IHashProvider`, `Gen`, `LoadingProps`, `requests.ListTaggingWorkRequestErrorsRequest`, `RBNFSet`, `IQuestionnaire`, `ColorInformation`, `PrismaConfig`, `KBService`, `BasicDataProvider`, `Executor`, `EmailAddress`, `DashboardStart`, `SavedToken`, `ArmArrayResult`, `ObjectCriteriaNode`, `MqttOptions`, `t.SelectablePath`, `IBuildTaskPlugin`, `PosSpan`, `IonicApp`, `InfoType`, `ListPipelinesCommandInput`, `PTestNode`, `UrlDrilldown`, `Severity`, `RxSlpStream`, `XUL.contentWindow`, `requests.ListCatalogPrivateEndpointsRequest`, `MessageWithReplies`, `ZoneOptions`, `CodeBlock`, `Origin`, `PiTypeDefinition`, `StacksPublicKey`, `TimelineActivity`, `CannonColliderShape`, `ContractCallContext`, `hm.BasicCredentialHandler`, `ViewMeta`, `Monoid`, `OpenSearchError`, `DescribeClusterCommandInput`, `TabItem`, `TETemplate`, `TestRun`, `GoogleMeetSegmentationConfig`, `LoadAll`, `ListenForCb`, `Cli`, `INetworkInfoFeatureDependency`, `vscode.NotebookCell`, `ServerHost`, `TSFile`, `moment.MomentStatic`, `ImageEncoder`, `IpPort`, `IBuildStageContext`, `ProviderUserBulkRequest`, `VatLayout`, `TransformId`, `CreateAssetProps`, `BatchCheckLayerAvailabilityCommandInput`, `Uint64Id`, `RealtimeEditMode`, `PersistenceProvider`, `ContextAwareLogger`, `UnionMemberMatchTransformer`, `TTheme`, `NewMsgData`, `symbol`, `WriterResource`, `IVarSize`, `Matchers`, `DatabaseTable`, `PuzzleAction`, `pxtc.BlocksInfo`, `P3`, `SerializedGame`, `CommentService`, `ImageMetadata`, `ThemeConfiguration`, `KonstColor`, `Setter`, `PathElement`, `ControllerParameterMetadata`, `FileIconService`, `GalleryActions`, `ISeedPhraseFormat`, `CdsControlMessage`, `FragmentedHandshake`, `RadixParticle`, `VNodeTypes`, `HTLC` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ner_graphcodebert_MT4TS_en_5.5.0_3.0_1725666190601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ner_graphcodebert_MT4TS_en_5.5.0_3.0_1725666190601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols("sentence") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_graphcodebert_MT4TS","en") \ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols(Array("sentence")) + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_graphcodebert_MT4TS","en") + .setInputCols(Array("sentence", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler,sentenceDetector, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ner_graphcodebert_MT4TS| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|605.7 MB| + +## References + +References + +- https://huggingface.co/kevinjesse/graphcodebert-MT4TS \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_roberta_large_tweetner_random_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_roberta_large_tweetner_random_en.md new file mode 100644 index 00000000000000..ef81fc7882fb31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_ner_roberta_large_tweetner_random_en.md @@ -0,0 +1,112 @@ +--- +layout: model +title: English RobertaForTokenClassification Large Cased model (from tner) +author: John Snow Labs +name: roberta_ner_roberta_large_tweetner_random +date: 2024-09-06 +tags: [bert, ner, open_source, en, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-large-tweetner-random` is a English model originally trained by `tner`. + +## Predicted Entities + +`group`, `creative_work`, `person`, `event`, `corporation`, `location`, `product` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_large_tweetner_random_en_5.5.0_3.0_1725624909622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ner_roberta_large_tweetner_random_en_5.5.0_3.0_1725624909622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +tokenizer = Tokenizer() \ + .setInputCols("sentence") \ + .setOutputCol("token") + +tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_roberta_large_tweetner_random","en") \ + .setInputCols(["sentence", "token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline(stages=[documentAssembler, sentenceDetector, tokenizer, tokenClassifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val tokenizer = new Tokenizer() + .setInputCols(Array("sentence")) + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("roberta_ner_roberta_large_tweetner_random","en") + .setInputCols(Array("sentence", "token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler,sentenceDetector, tokenizer, tokenClassifier)) + +val data = Seq("PUT YOUR STRING HERE").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.ner.roberta.tweet.tweetner_random.large.by_tner").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ner_roberta_large_tweetner_random| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/tner/roberta-large-tweetner-random \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_small_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_small_en.md new file mode 100644 index 00000000000000..9fc9ceef0e3697 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_small_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_small RoBertaEmbeddings from smallbenchnlp +author: John Snow Labs +name: roberta_small +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_small` is a English model originally trained by smallbenchnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_small_en_5.5.0_3.0_1725660273257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_small_en_5.5.0_3.0_1725660273257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_small","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_small","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|84.7 MB| + +## References + +https://huggingface.co/smallbenchnlp/roberta-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-roberta_small_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-roberta_small_pipeline_en.md new file mode 100644 index 00000000000000..f44cfc7717b5d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-roberta_small_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_small_pipeline pipeline RoBertaEmbeddings from smallbenchnlp +author: John Snow Labs +name: roberta_small_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_small_pipeline` is a English model originally trained by smallbenchnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_small_pipeline_en_5.5.0_3.0_1725660277702.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_small_pipeline_en_5.5.0_3.0_1725660277702.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_small_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_small_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|84.8 MB| + +## References + +https://huggingface.co/smallbenchnlp/roberta-small + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_en.md b/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_en.md new file mode 100644 index 00000000000000..db19da4a972c30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rohit_setfit_model MPNetEmbeddings from Rohit129 +author: John Snow Labs +name: rohit_setfit_model +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rohit_setfit_model` is a English model originally trained by Rohit129. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rohit_setfit_model_en_5.5.0_3.0_1725595760043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rohit_setfit_model_en_5.5.0_3.0_1725595760043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("rohit_setfit_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("rohit_setfit_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rohit_setfit_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Rohit129/rohit-setfit-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_pipeline_en.md new file mode 100644 index 00000000000000..180bf2d9b0289d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rohit_setfit_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rohit_setfit_model_pipeline pipeline MPNetEmbeddings from Rohit129 +author: John Snow Labs +name: rohit_setfit_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rohit_setfit_model_pipeline` is a English model originally trained by Rohit129. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rohit_setfit_model_pipeline_en_5.5.0_3.0_1725595779905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rohit_setfit_model_pipeline_en_5.5.0_3.0_1725595779905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rohit_setfit_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rohit_setfit_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rohit_setfit_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Rohit129/rohit-setfit-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en.md new file mode 100644 index 00000000000000..4f76655612639b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English rotten_tomatoes_microsoft_deberta_v3_base_seed_2 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: rotten_tomatoes_microsoft_deberta_v3_base_seed_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rotten_tomatoes_microsoft_deberta_v3_base_seed_2` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725588819086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rotten_tomatoes_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725588819086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("rotten_tomatoes_microsoft_deberta_v3_base_seed_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("rotten_tomatoes_microsoft_deberta_v3_base_seed_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rotten_tomatoes_microsoft_deberta_v3_base_seed_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|577.9 MB| + +## References + +https://huggingface.co/utahnlp/rotten_tomatoes_microsoft_deberta-v3-base_seed-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_en.md b/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_en.md new file mode 100644 index 00000000000000..b03b0163f7a8f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rpa_synth_08nov XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rpa_synth_08nov +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth_08nov` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth_08nov_en_5.5.0_3.0_1725630786007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth_08nov_en_5.5.0_3.0_1725630786007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth_08nov","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rpa_synth_08nov", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth_08nov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|796.6 MB| + +## References + +https://huggingface.co/am-infoweb/RPA_Synth_08nov \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_pipeline_en.md new file mode 100644 index 00000000000000..15ec78caf4b812 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-rpa_synth_08nov_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rpa_synth_08nov_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rpa_synth_08nov_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rpa_synth_08nov_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rpa_synth_08nov_pipeline_en_5.5.0_3.0_1725630907927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rpa_synth_08nov_pipeline_en_5.5.0_3.0_1725630907927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rpa_synth_08nov_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rpa_synth_08nov_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rpa_synth_08nov_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|796.6 MB| + +## References + +https://huggingface.co/am-infoweb/RPA_Synth_08nov + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_en.md b/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_en.md new file mode 100644 index 00000000000000..ba1a3c7a32e815 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English sb_temfac MPNetEmbeddings from stealthpy +author: John Snow Labs +name: sb_temfac +date: 2024-09-06 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sb_temfac` is a English model originally trained by stealthpy. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sb_temfac_en_5.5.0_3.0_1725629815931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sb_temfac_en_5.5.0_3.0_1725629815931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("sb_temfac","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("sb_temfac", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sb_temfac| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.6 MB| + +## References + +References + +https://huggingface.co/stealthpy/sb-temfac \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_pipeline_en.md new file mode 100644 index 00000000000000..ba24ccf291c785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sb_temfac_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sb_temfac_pipeline pipeline MPNetForSequenceClassification from stealthpy +author: John Snow Labs +name: sb_temfac_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sb_temfac_pipeline` is a English model originally trained by stealthpy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sb_temfac_pipeline_en_5.5.0_3.0_1725629836162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sb_temfac_pipeline_en_5.5.0_3.0_1725629836162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sb_temfac_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sb_temfac_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sb_temfac_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/stealthpy/sb-temfac + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en.md b/docs/_posts/ahmedlone127/2024-09-06-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en.md new file mode 100644 index 00000000000000..ae349ebe4d0534 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en_5.5.0_3.0_1725620216039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason_en_5.5.0_3.0_1725620216039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_scr_d2_data_amazonscience_massive_all_1_1_gamma_jason| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|884.3 MB| + +## References + +https://huggingface.co/haryoaw/scenario-NON-KD-SCR-D2_data-AmazonScience_massive_all_1_1_gamma-jason \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-secroberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-secroberta_en.md new file mode 100644 index 00000000000000..54439d98f86aaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-secroberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English secroberta RoBertaEmbeddings from jackaduma +author: John Snow Labs +name: secroberta +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`secroberta` is a English model originally trained by jackaduma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/secroberta_en_5.5.0_3.0_1725660293127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/secroberta_en_5.5.0_3.0_1725660293127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("secroberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("secroberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|secroberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/jackaduma/SecRoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-secroberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-secroberta_pipeline_en.md new file mode 100644 index 00000000000000..aba4cc92ccb06c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-secroberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English secroberta_pipeline pipeline RoBertaEmbeddings from jackaduma +author: John Snow Labs +name: secroberta_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`secroberta_pipeline` is a English model originally trained by jackaduma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/secroberta_pipeline_en_5.5.0_3.0_1725660308451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/secroberta_pipeline_en_5.5.0_3.0_1725660308451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("secroberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("secroberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|secroberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/jackaduma/SecRoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-securebert_aptner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-securebert_aptner_pipeline_en.md new file mode 100644 index 00000000000000..06130ef113bd4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-securebert_aptner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English securebert_aptner_pipeline pipeline RoBertaForTokenClassification from Cyber-ThreaD +author: John Snow Labs +name: securebert_aptner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`securebert_aptner_pipeline` is a English model originally trained by Cyber-ThreaD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/securebert_aptner_pipeline_en_5.5.0_3.0_1725625424568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/securebert_aptner_pipeline_en_5.5.0_3.0_1725625424568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("securebert_aptner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("securebert_aptner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|securebert_aptner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/Cyber-ThreaD/SecureBERT-APTNER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-semantic_test_en.md b/docs/_posts/ahmedlone127/2024-09-06-semantic_test_en.md new file mode 100644 index 00000000000000..c49040aa833eaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-semantic_test_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English semantic_test MPNetEmbeddings from Wipiii +author: John Snow Labs +name: semantic_test +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`semantic_test` is a English model originally trained by Wipiii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/semantic_test_en_5.5.0_3.0_1725595250030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/semantic_test_en_5.5.0_3.0_1725595250030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("semantic_test","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("semantic_test","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|semantic_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/Wipiii/semantic-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-semantic_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-semantic_test_pipeline_en.md new file mode 100644 index 00000000000000..72bb3877b3c407 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-semantic_test_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English semantic_test_pipeline pipeline MPNetEmbeddings from Wipiii +author: John Snow Labs +name: semantic_test_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`semantic_test_pipeline` is a English model originally trained by Wipiii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/semantic_test_pipeline_en_5.5.0_3.0_1725595274106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/semantic_test_pipeline_en_5.5.0_3.0_1725595274106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("semantic_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("semantic_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|semantic_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/Wipiii/semantic-test + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sembr2023_bert_small_en.md b/docs/_posts/ahmedlone127/2024-09-06-sembr2023_bert_small_en.md new file mode 100644 index 00000000000000..fb004bb29c165b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sembr2023_bert_small_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sembr2023_bert_small BertForTokenClassification from admko +author: John Snow Labs +name: sembr2023_bert_small +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sembr2023_bert_small` is a English model originally trained by admko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sembr2023_bert_small_en_5.5.0_3.0_1725600553685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sembr2023_bert_small_en_5.5.0_3.0_1725600553685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sembr2023_bert_small","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sembr2023_bert_small", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sembr2023_bert_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/admko/sembr2023-bert-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_en.md new file mode 100644 index 00000000000000..d7a868f80d4ec8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_afro_xlmr_mini_finetuned_kintweetsc XlmRoBertaSentenceEmbeddings from RogerB +author: John Snow Labs +name: sent_afro_xlmr_mini_finetuned_kintweetsc +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afro_xlmr_mini_finetuned_kintweetsc` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsc_en_5.5.0_3.0_1725623640426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsc_en_5.5.0_3.0_1725623640426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_afro_xlmr_mini_finetuned_kintweetsc","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_afro_xlmr_mini_finetuned_kintweetsc","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afro_xlmr_mini_finetuned_kintweetsc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|443.1 MB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-mini-finetuned-kintweetsC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en.md new file mode 100644 index 00000000000000..440b526b87c2d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline pipeline XlmRoBertaSentenceEmbeddings from RogerB +author: John Snow Labs +name: sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725623671307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725623671307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afro_xlmr_mini_finetuned_kintweetsc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|443.7 MB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-mini-finetuned-kintweetsC + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_en.md new file mode 100644 index 00000000000000..64c89a7dfec232 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_analysis DistilBertForSequenceClassification from ljtaylor99 +author: John Snow Labs +name: sent_analysis +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_analysis` is a English model originally trained by ljtaylor99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_analysis_en_5.5.0_3.0_1725607946407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_analysis_en_5.5.0_3.0_1725607946407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("sent_analysis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("sent_analysis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_analysis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ljtaylor99/sent-analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_pipeline_en.md new file mode 100644 index 00000000000000..750f6065073e33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_analysis_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sent_analysis_pipeline pipeline DistilBertForSequenceClassification from ljtaylor99 +author: John Snow Labs +name: sent_analysis_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_analysis_pipeline` is a English model originally trained by ljtaylor99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_analysis_pipeline_en_5.5.0_3.0_1725607964055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_analysis_pipeline_en_5.5.0_3.0_1725607964055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_analysis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_analysis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_analysis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ljtaylor99/sent-analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_en.md new file mode 100644 index 00000000000000..3e064d69421786 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_english_chinese_cased BertSentenceEmbeddings from Geotrend +author: John Snow Labs +name: sent_bert_base_english_chinese_cased +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_english_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_chinese_cased_en_5.5.0_3.0_1725667161499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_chinese_cased_en_5.5.0_3.0_1725667161499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_english_chinese_cased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_english_chinese_cased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_english_chinese_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_pipeline_en.md new file mode 100644 index 00000000000000..c51d470a3d0b2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_english_chinese_cased_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_english_chinese_cased_pipeline pipeline BertSentenceEmbeddings from Geotrend +author: John Snow Labs +name: sent_bert_base_english_chinese_cased_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_english_chinese_cased_pipeline` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_chinese_cased_pipeline_en_5.5.0_3.0_1725667180105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_english_chinese_cased_pipeline_en_5.5.0_3.0_1725667180105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_english_chinese_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_english_chinese_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_english_chinese_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|422.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_historic_multilingual_cased_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_historic_multilingual_cased_pipeline_xx.md new file mode 100644 index 00000000000000..76261c62e3460b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_historic_multilingual_cased_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_bert_base_historic_multilingual_cased_pipeline pipeline BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_bert_base_historic_multilingual_cased_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_historic_multilingual_cased_pipeline` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_historic_multilingual_cased_pipeline_xx_5.5.0_3.0_1725650678099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_historic_multilingual_cased_pipeline_xx_5.5.0_3.0_1725650678099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_historic_multilingual_cased_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_historic_multilingual_cased_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_historic_multilingual_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|412.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-historic-multilingual-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx.md new file mode 100644 index 00000000000000..4ecb7b7b7bce0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline pipeline BertSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline` is a Multilingual model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx_5.5.0_3.0_1725666776299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline_xx_5.5.0_3.0_1725666776299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_multilingual_cased_finetuned_yoruba_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.6 MB| + +## References + +https://huggingface.co/Davlan/bert-base-multilingual-cased-finetuned-yoruba + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_xx.md new file mode 100644 index 00000000000000..e08dbcb9eead1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_multilingual_cased_finetuned_yoruba_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_bert_base_multilingual_cased_finetuned_yoruba BertSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_bert_base_multilingual_cased_finetuned_yoruba +date: 2024-09-06 +tags: [xx, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_multilingual_cased_finetuned_yoruba` is a Multilingual model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_multilingual_cased_finetuned_yoruba_xx_5.5.0_3.0_1725666745859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_multilingual_cased_finetuned_yoruba_xx_5.5.0_3.0_1725666745859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_multilingual_cased_finetuned_yoruba","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_multilingual_cased_finetuned_yoruba","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_multilingual_cased_finetuned_yoruba| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/Davlan/bert-base-multilingual-cased-finetuned-yoruba \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_romanian_uncased_v1_pipeline_ro.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_romanian_uncased_v1_pipeline_ro.md new file mode 100644 index 00000000000000..c96139bd81d502 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_base_romanian_uncased_v1_pipeline_ro.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Moldavian, Moldovan, Romanian sent_bert_base_romanian_uncased_v1_pipeline pipeline BertSentenceEmbeddings from dumitrescustefan +author: John Snow Labs +name: sent_bert_base_romanian_uncased_v1_pipeline +date: 2024-09-06 +tags: [ro, open_source, pipeline, onnx] +task: Embeddings +language: ro +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_romanian_uncased_v1_pipeline` is a Moldavian, Moldovan, Romanian model originally trained by dumitrescustefan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_romanian_uncased_v1_pipeline_ro_5.5.0_3.0_1725651094452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_romanian_uncased_v1_pipeline_ro_5.5.0_3.0_1725651094452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_romanian_uncased_v1_pipeline", lang = "ro") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_romanian_uncased_v1_pipeline", lang = "ro") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_romanian_uncased_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ro| +|Size:|465.0 MB| + +## References + +https://huggingface.co/dumitrescustefan/bert-base-romanian-uncased-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_en.md new file mode 100644 index 00000000000000..e6e63f43b494c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_for_patents BertSentenceEmbeddings from anferico +author: John Snow Labs +name: sent_bert_for_patents +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_for_patents` is a English model originally trained by anferico. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_for_patents_en_5.5.0_3.0_1725651521612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_for_patents_en_5.5.0_3.0_1725651521612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_for_patents","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_for_patents","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_for_patents| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/anferico/bert-for-patents \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_pipeline_en.md new file mode 100644 index 00000000000000..dcf77249db7141 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_for_patents_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_for_patents_pipeline pipeline BertSentenceEmbeddings from anferico +author: John Snow Labs +name: sent_bert_for_patents_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_for_patents_pipeline` is a English model originally trained by anferico. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_for_patents_pipeline_en_5.5.0_3.0_1725651578901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_for_patents_pipeline_en_5.5.0_3.0_1725651578901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_for_patents_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_for_patents_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_for_patents_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/anferico/bert-for-patents + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_large_uncased_semeval2014_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_large_uncased_semeval2014_pipeline_en.md new file mode 100644 index 00000000000000..9a3cb9302c778f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_large_uncased_semeval2014_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_large_uncased_semeval2014_pipeline pipeline BertSentenceEmbeddings from StevenLimcorn +author: John Snow Labs +name: sent_bert_large_uncased_semeval2014_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_large_uncased_semeval2014_pipeline` is a English model originally trained by StevenLimcorn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_large_uncased_semeval2014_pipeline_en_5.5.0_3.0_1725666991696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_large_uncased_semeval2014_pipeline_en_5.5.0_3.0_1725666991696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_large_uncased_semeval2014_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_large_uncased_semeval2014_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_large_uncased_semeval2014_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/StevenLimcorn/bert-large-uncased-semeval2014 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_pipeline_xx.md new file mode 100644 index 00000000000000..ad1aa7f90254fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_bert_medieval_multilingual_pipeline pipeline BertSentenceEmbeddings from magistermilitum +author: John Snow Labs +name: sent_bert_medieval_multilingual_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_medieval_multilingual_pipeline` is a Multilingual model originally trained by magistermilitum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_medieval_multilingual_pipeline_xx_5.5.0_3.0_1725667198036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_medieval_multilingual_pipeline_xx_5.5.0_3.0_1725667198036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_medieval_multilingual_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_medieval_multilingual_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_medieval_multilingual_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|665.5 MB| + +## References + +https://huggingface.co/magistermilitum/bert_medieval_multilingual + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_xx.md new file mode 100644 index 00000000000000..5678cd0bc0e5fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_bert_medieval_multilingual_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_bert_medieval_multilingual BertSentenceEmbeddings from magistermilitum +author: John Snow Labs +name: sent_bert_medieval_multilingual +date: 2024-09-06 +tags: [xx, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_medieval_multilingual` is a Multilingual model originally trained by magistermilitum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_medieval_multilingual_xx_5.5.0_3.0_1725667167151.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_medieval_multilingual_xx_5.5.0_3.0_1725667167151.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_medieval_multilingual","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_medieval_multilingual","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_medieval_multilingual| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|664.9 MB| + +## References + +https://huggingface.co/magistermilitum/bert_medieval_multilingual \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_biomedvlp_cxr_bert_general_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_biomedvlp_cxr_bert_general_pipeline_en.md new file mode 100644 index 00000000000000..a862dba8694643 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_biomedvlp_cxr_bert_general_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_biomedvlp_cxr_bert_general_pipeline pipeline BertSentenceEmbeddings from microsoft +author: John Snow Labs +name: sent_biomedvlp_cxr_bert_general_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_biomedvlp_cxr_bert_general_pipeline` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_biomedvlp_cxr_bert_general_pipeline_en_5.5.0_3.0_1725651349081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_biomedvlp_cxr_bert_general_pipeline_en_5.5.0_3.0_1725651349081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_biomedvlp_cxr_bert_general_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_biomedvlp_cxr_bert_general_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_biomedvlp_cxr_bert_general_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|260.8 MB| + +## References + +https://huggingface.co/microsoft/BiomedVLP-CXR-BERT-general + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_checkpoint_12600_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_checkpoint_12600_en.md new file mode 100644 index 00000000000000..1fd8981eb3ee17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_checkpoint_12600_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_checkpoint_12600 XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_12600 +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_12600` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_12600_en_5.5.0_3.0_1725623508648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_12600_en_5.5.0_3.0_1725623508648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_12600","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_12600","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_12600| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-12600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_cs.md b/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_cs.md new file mode 100644 index 00000000000000..c09ce2bdc67ce0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_cs.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Czech sent_czert_b_base_cased BertSentenceEmbeddings from UWB-AIR +author: John Snow Labs +name: sent_czert_b_base_cased +date: 2024-09-06 +tags: [cs, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: cs +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_czert_b_base_cased` is a Czech model originally trained by UWB-AIR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_czert_b_base_cased_cs_5.5.0_3.0_1725650656270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_czert_b_base_cased_cs_5.5.0_3.0_1725650656270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_czert_b_base_cased","cs") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_czert_b_base_cased","cs") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_czert_b_base_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|cs| +|Size:|408.3 MB| + +## References + +https://huggingface.co/UWB-AIR/Czert-B-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_pipeline_cs.md b/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_pipeline_cs.md new file mode 100644 index 00000000000000..a396670aacfa01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_czert_b_base_cased_pipeline_cs.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Czech sent_czert_b_base_cased_pipeline pipeline BertSentenceEmbeddings from UWB-AIR +author: John Snow Labs +name: sent_czert_b_base_cased_pipeline +date: 2024-09-06 +tags: [cs, open_source, pipeline, onnx] +task: Embeddings +language: cs +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_czert_b_base_cased_pipeline` is a Czech model originally trained by UWB-AIR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_czert_b_base_cased_pipeline_cs_5.5.0_3.0_1725650675902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_czert_b_base_cased_pipeline_cs_5.5.0_3.0_1725650675902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_czert_b_base_cased_pipeline", lang = "cs") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_czert_b_base_cased_pipeline", lang = "cs") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_czert_b_base_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|cs| +|Size:|408.8 MB| + +## References + +https://huggingface.co/UWB-AIR/Czert-B-base-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_pipeline_xx.md new file mode 100644 index 00000000000000..3050d6658c09c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_entitycs_39_mlm_xlmr_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from huawei-noah +author: John Snow Labs +name: sent_entitycs_39_mlm_xlmr_base_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_entitycs_39_mlm_xlmr_base_pipeline` is a Multilingual model originally trained by huawei-noah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_mlm_xlmr_base_pipeline_xx_5.5.0_3.0_1725623726769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_mlm_xlmr_base_pipeline_xx_5.5.0_3.0_1725623726769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_entitycs_39_mlm_xlmr_base_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_entitycs_39_mlm_xlmr_base_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_entitycs_39_mlm_xlmr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|944.8 MB| + +## References + +https://huggingface.co/huawei-noah/EntityCS-39-MLM-xlmr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_xx.md new file mode 100644 index 00000000000000..74f4552c93dc29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_entitycs_39_mlm_xlmr_base_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_entitycs_39_mlm_xlmr_base XlmRoBertaSentenceEmbeddings from huawei-noah +author: John Snow Labs +name: sent_entitycs_39_mlm_xlmr_base +date: 2024-09-06 +tags: [xx, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_entitycs_39_mlm_xlmr_base` is a Multilingual model originally trained by huawei-noah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_mlm_xlmr_base_xx_5.5.0_3.0_1725623634709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_entitycs_39_mlm_xlmr_base_xx_5.5.0_3.0_1725623634709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_entitycs_39_mlm_xlmr_base","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_entitycs_39_mlm_xlmr_base","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_entitycs_39_mlm_xlmr_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|944.2 MB| + +## References + +https://huggingface.co/huawei-noah/EntityCS-39-MLM-xlmr-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_et.md b/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_et.md new file mode 100644 index 00000000000000..6c6667b0bdd0c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_et.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Estonian sent_estroberta XlmRoBertaSentenceEmbeddings from tartuNLP +author: John Snow Labs +name: sent_estroberta +date: 2024-09-06 +tags: [et, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: et +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_estroberta` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_estroberta_et_5.5.0_3.0_1725623372067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_estroberta_et_5.5.0_3.0_1725623372067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_estroberta","et") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_estroberta","et") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_estroberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|1.0 GB| + +## References + +https://huggingface.co/tartuNLP/EstRoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_pipeline_et.md b/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_pipeline_et.md new file mode 100644 index 00000000000000..26add5017f7eca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_estroberta_pipeline_et.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Estonian sent_estroberta_pipeline pipeline XlmRoBertaSentenceEmbeddings from tartuNLP +author: John Snow Labs +name: sent_estroberta_pipeline +date: 2024-09-06 +tags: [et, open_source, pipeline, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_estroberta_pipeline` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_estroberta_pipeline_et_5.5.0_3.0_1725623424253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_estroberta_pipeline_et_5.5.0_3.0_1725623424253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_estroberta_pipeline", lang = "et") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_estroberta_pipeline", lang = "et") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_estroberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|et| +|Size:|1.0 GB| + +## References + +https://huggingface.co/tartuNLP/EstRoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_en.md new file mode 100644 index 00000000000000..2b9fb6c151aa80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_fbert BertSentenceEmbeddings from diptanu +author: John Snow Labs +name: sent_fbert +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_fbert` is a English model originally trained by diptanu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_fbert_en_5.5.0_3.0_1725666723963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_fbert_en_5.5.0_3.0_1725666723963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_fbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_fbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_fbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/diptanu/fBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_pipeline_en.md new file mode 100644 index 00000000000000..6498e86c7f2733 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_fbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_fbert_pipeline pipeline BertSentenceEmbeddings from diptanu +author: John Snow Labs +name: sent_fbert_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_fbert_pipeline` is a English model originally trained by diptanu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_fbert_pipeline_en_5.5.0_3.0_1725666742988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_fbert_pipeline_en_5.5.0_3.0_1725666742988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_fbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_fbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_fbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/diptanu/fBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_en.md new file mode 100644 index 00000000000000..c26aedf9769c09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_gbert_large_finetuned_cust BertSentenceEmbeddings from shafin +author: John Snow Labs +name: sent_gbert_large_finetuned_cust +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gbert_large_finetuned_cust` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gbert_large_finetuned_cust_en_5.5.0_3.0_1725666803418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gbert_large_finetuned_cust_en_5.5.0_3.0_1725666803418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_gbert_large_finetuned_cust","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_gbert_large_finetuned_cust","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gbert_large_finetuned_cust| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shafin/gbert-large-finetuned-cust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_pipeline_en.md new file mode 100644 index 00000000000000..d4c19eafb355c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_gbert_large_finetuned_cust_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_gbert_large_finetuned_cust_pipeline pipeline BertSentenceEmbeddings from shafin +author: John Snow Labs +name: sent_gbert_large_finetuned_cust_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_gbert_large_finetuned_cust_pipeline` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_gbert_large_finetuned_cust_pipeline_en_5.5.0_3.0_1725666867715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_gbert_large_finetuned_cust_pipeline_en_5.5.0_3.0_1725666867715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_gbert_large_finetuned_cust_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_gbert_large_finetuned_cust_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_gbert_large_finetuned_cust_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shafin/gbert-large-finetuned-cust + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_hing_mbert_hi.md b/docs/_posts/ahmedlone127/2024-09-06-sent_hing_mbert_hi.md new file mode 100644 index 00000000000000..04739fc6313306 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_hing_mbert_hi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hindi sent_hing_mbert BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_hing_mbert +date: 2024-09-06 +tags: [hi, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hing_mbert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hing_mbert_hi_5.5.0_3.0_1725650810137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hing_mbert_hi_5.5.0_3.0_1725650810137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hing_mbert","hi") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hing_mbert","hi") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hing_mbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|664.9 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-mbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_pipeline_xx.md new file mode 100644 index 00000000000000..7b4843c5a846d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_indicbertv2_mlm_sam_tlm_pipeline pipeline BertSentenceEmbeddings from ai4bharat +author: John Snow Labs +name: sent_indicbertv2_mlm_sam_tlm_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_indicbertv2_mlm_sam_tlm_pipeline` is a Multilingual model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_sam_tlm_pipeline_xx_5.5.0_3.0_1725651368722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_sam_tlm_pipeline_xx_5.5.0_3.0_1725651368722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_indicbertv2_mlm_sam_tlm_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_indicbertv2_mlm_sam_tlm_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_indicbertv2_mlm_sam_tlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-Sam-TLM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_xx.md new file mode 100644 index 00000000000000..4605f76f2e8f4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_indicbertv2_mlm_sam_tlm_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_indicbertv2_mlm_sam_tlm BertSentenceEmbeddings from ai4bharat +author: John Snow Labs +name: sent_indicbertv2_mlm_sam_tlm +date: 2024-09-06 +tags: [xx, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_indicbertv2_mlm_sam_tlm` is a Multilingual model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_sam_tlm_xx_5.5.0_3.0_1725651318623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_indicbertv2_mlm_sam_tlm_xx_5.5.0_3.0_1725651318623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_indicbertv2_mlm_sam_tlm","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_indicbertv2_mlm_sam_tlm","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_indicbertv2_mlm_sam_tlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-Sam-TLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_koobert_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_koobert_pipeline_xx.md new file mode 100644 index 00000000000000..4c52a3a79e64c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_koobert_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_koobert_pipeline pipeline BertSentenceEmbeddings from KooAI +author: John Snow Labs +name: sent_koobert_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_koobert_pipeline` is a Multilingual model originally trained by KooAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_koobert_pipeline_xx_5.5.0_3.0_1725651161757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_koobert_pipeline_xx_5.5.0_3.0_1725651161757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_koobert_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_koobert_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_koobert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|689.6 MB| + +## References + +https://huggingface.co/KooAI/KooBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_legal_bert_base_cased_ptbr_pt.md b/docs/_posts/ahmedlone127/2024-09-06-sent_legal_bert_base_cased_ptbr_pt.md new file mode 100644 index 00000000000000..f33e98fe01b90f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_legal_bert_base_cased_ptbr_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese sent_legal_bert_base_cased_ptbr BertSentenceEmbeddings from dominguesm +author: John Snow Labs +name: sent_legal_bert_base_cased_ptbr +date: 2024-09-06 +tags: [pt, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legal_bert_base_cased_ptbr` is a Portuguese model originally trained by dominguesm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_cased_ptbr_pt_5.5.0_3.0_1725651117698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legal_bert_base_cased_ptbr_pt_5.5.0_3.0_1725651117698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_cased_ptbr","pt") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legal_bert_base_cased_ptbr","pt") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legal_bert_base_cased_ptbr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|470.0 MB| + +## References + +https://huggingface.co/dominguesm/legal-bert-base-cased-ptbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_neural_cherche_sparse_embed_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_neural_cherche_sparse_embed_pipeline_en.md new file mode 100644 index 00000000000000..52340c995b29e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_neural_cherche_sparse_embed_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_neural_cherche_sparse_embed_pipeline pipeline BertSentenceEmbeddings from raphaelsty +author: John Snow Labs +name: sent_neural_cherche_sparse_embed_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_neural_cherche_sparse_embed_pipeline` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_neural_cherche_sparse_embed_pipeline_en_5.5.0_3.0_1725650941595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_neural_cherche_sparse_embed_pipeline_en_5.5.0_3.0_1725650941595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_neural_cherche_sparse_embed_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_neural_cherche_sparse_embed_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_neural_cherche_sparse_embed_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/raphaelsty/neural-cherche-sparse-embed + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_odia_bert_pipeline_or.md b/docs/_posts/ahmedlone127/2024-09-06-sent_odia_bert_pipeline_or.md new file mode 100644 index 00000000000000..749c9f779b4b38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_odia_bert_pipeline_or.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Oriya (macrolanguage) sent_odia_bert_pipeline pipeline BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_odia_bert_pipeline +date: 2024-09-06 +tags: [or, open_source, pipeline, onnx] +task: Embeddings +language: or +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_odia_bert_pipeline` is a Oriya (macrolanguage) model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_odia_bert_pipeline_or_5.5.0_3.0_1725651007547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_odia_bert_pipeline_or_5.5.0_3.0_1725651007547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_odia_bert_pipeline", lang = "or") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_odia_bert_pipeline", lang = "or") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_odia_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|or| +|Size:|890.9 MB| + +## References + +https://huggingface.co/l3cube-pune/odia-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_en.md new file mode 100644 index 00000000000000..3d7f51cefaf9bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_ofa_multi_400 XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_400 +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_400` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_400_en_5.5.0_3.0_1725622679910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_400_en_5.5.0_3.0_1725622679910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_400","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_400","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_400| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-400 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_pipeline_en.md new file mode 100644 index 00000000000000..323fefaaa78d67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_ofa_multi_400_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_ofa_multi_400_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_400_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_400_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_400_pipeline_en_5.5.0_3.0_1725622762276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_400_pipeline_en_5.5.0_3.0_1725622762276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_ofa_multi_400_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_ofa_multi_400_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_400_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-400 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_en.md new file mode 100644 index 00000000000000..e66216386bb8fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_ope_bert_v2_1 BertSentenceEmbeddings from RyotaroOKabe +author: John Snow Labs +name: sent_ope_bert_v2_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ope_bert_v2_1` is a English model originally trained by RyotaroOKabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ope_bert_v2_1_en_5.5.0_3.0_1725666909904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ope_bert_v2_1_en_5.5.0_3.0_1725666909904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_ope_bert_v2_1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_ope_bert_v2_1","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ope_bert_v2_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/RyotaroOKabe/ope_bert_v2.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_pipeline_en.md new file mode 100644 index 00000000000000..ff88bb7229a20d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_ope_bert_v2_1_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_ope_bert_v2_1_pipeline pipeline BertSentenceEmbeddings from RyotaroOKabe +author: John Snow Labs +name: sent_ope_bert_v2_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ope_bert_v2_1_pipeline` is a English model originally trained by RyotaroOKabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ope_bert_v2_1_pipeline_en_5.5.0_3.0_1725666929722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ope_bert_v2_1_pipeline_en_5.5.0_3.0_1725666929722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_ope_bert_v2_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_ope_bert_v2_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ope_bert_v2_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/RyotaroOKabe/ope_bert_v2.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_en.md new file mode 100644 index 00000000000000..a4ed7728960904 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_panx_xlmr_base XlmRoBertaSentenceEmbeddings from oceanpty +author: John Snow Labs +name: sent_panx_xlmr_base +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_panx_xlmr_base` is a English model originally trained by oceanpty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_panx_xlmr_base_en_5.5.0_3.0_1725622905786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_panx_xlmr_base_en_5.5.0_3.0_1725622905786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_panx_xlmr_base","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_panx_xlmr_base","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_panx_xlmr_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|797.5 MB| + +## References + +https://huggingface.co/oceanpty/panx-xlmr-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_pipeline_en.md new file mode 100644 index 00000000000000..9550970ccea9eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_panx_xlmr_base_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_panx_xlmr_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from oceanpty +author: John Snow Labs +name: sent_panx_xlmr_base_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_panx_xlmr_base_pipeline` is a English model originally trained by oceanpty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_panx_xlmr_base_pipeline_en_5.5.0_3.0_1725623044956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_panx_xlmr_base_pipeline_en_5.5.0_3.0_1725623044956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_panx_xlmr_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_panx_xlmr_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_panx_xlmr_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|798.0 MB| + +## References + +https://huggingface.co/oceanpty/panx-xlmr-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_en.md new file mode 100644 index 00000000000000..29c512e90d4def --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_psych_search BertSentenceEmbeddings from nlp4good +author: John Snow Labs +name: sent_psych_search +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_psych_search` is a English model originally trained by nlp4good. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_psych_search_en_5.5.0_3.0_1725651306927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_psych_search_en_5.5.0_3.0_1725651306927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_psych_search","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_psych_search","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_psych_search| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/nlp4good/psych-search \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_pipeline_en.md new file mode 100644 index 00000000000000..44dda6f716b864 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_psych_search_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_psych_search_pipeline pipeline BertSentenceEmbeddings from nlp4good +author: John Snow Labs +name: sent_psych_search_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_psych_search_pipeline` is a English model originally trained by nlp4good. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_psych_search_pipeline_en_5.5.0_3.0_1725651325259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_psych_search_pipeline_en_5.5.0_3.0_1725651325259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_psych_search_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_psych_search_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_psych_search_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.5 MB| + +## References + +https://huggingface.co/nlp4good/psych-search + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_pipeline_xx.md new file mode 100644 index 00000000000000..b946393f5ec414 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_roberta_base_exp_32_pipeline pipeline XlmRoBertaSentenceEmbeddings from pere +author: John Snow Labs +name: sent_roberta_base_exp_32_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_roberta_base_exp_32_pipeline` is a Multilingual model originally trained by pere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_roberta_base_exp_32_pipeline_xx_5.5.0_3.0_1725623916581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_roberta_base_exp_32_pipeline_xx_5.5.0_3.0_1725623916581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_roberta_base_exp_32_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_roberta_base_exp_32_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_roberta_base_exp_32_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/pere/roberta-base-exp-32 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_xx.md b/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_xx.md new file mode 100644 index 00000000000000..3789af416316c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_roberta_base_exp_32_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_roberta_base_exp_32 XlmRoBertaSentenceEmbeddings from pere +author: John Snow Labs +name: sent_roberta_base_exp_32 +date: 2024-09-06 +tags: [xx, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_roberta_base_exp_32` is a Multilingual model originally trained by pere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_roberta_base_exp_32_xx_5.5.0_3.0_1725623866417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_roberta_base_exp_32_xx_5.5.0_3.0_1725623866417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_roberta_base_exp_32","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_roberta_base_exp_32","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_roberta_base_exp_32| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/pere/roberta-base-exp-32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_splade_sparse_vector_pinecone_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_splade_sparse_vector_pinecone_en.md new file mode 100644 index 00000000000000..a4352af937dd07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_splade_sparse_vector_pinecone_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_splade_sparse_vector_pinecone BertSentenceEmbeddings from joaojanini +author: John Snow Labs +name: sent_splade_sparse_vector_pinecone +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_splade_sparse_vector_pinecone` is a English model originally trained by joaojanini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_splade_sparse_vector_pinecone_en_5.5.0_3.0_1725651441121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_splade_sparse_vector_pinecone_en_5.5.0_3.0_1725651441121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_splade_sparse_vector_pinecone","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_splade_sparse_vector_pinecone","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_splade_sparse_vector_pinecone| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/joaojanini/splade_sparse_vector_pinecone \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_en.md new file mode 100644 index 00000000000000..080d824f14c8f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_dholuo XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_dholuo +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_dholuo` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_dholuo_en_5.5.0_3.0_1725623738282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_dholuo_en_5.5.0_3.0_1725623738282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_dholuo","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_dholuo","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_dholuo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-luo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_pipeline_en.md new file mode 100644 index 00000000000000..d3d30fce1313c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_dholuo_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_dholuo_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_dholuo_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_dholuo_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_dholuo_pipeline_en_5.5.0_3.0_1725623793465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_dholuo_pipeline_en_5.5.0_3.0_1725623793465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_dholuo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_dholuo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_dholuo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-luo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_en.md new file mode 100644 index 00000000000000..bfb5eedfe16417 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_digikala XlmRoBertaSentenceEmbeddings from ShahlaDnshi96 +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_digikala +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_digikala` is a English model originally trained by ShahlaDnshi96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_digikala_en_5.5.0_3.0_1725623199480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_digikala_en_5.5.0_3.0_1725623199480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_digikala","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_digikala","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_digikala| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|652.9 MB| + +## References + +https://huggingface.co/ShahlaDnshi96/xlm-roberta-base-finetuned-digikala \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_pipeline_en.md new file mode 100644 index 00000000000000..40fbb112734c5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_digikala_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_digikala_pipeline pipeline XlmRoBertaSentenceEmbeddings from ShahlaDnshi96 +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_digikala_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_digikala_pipeline` is a English model originally trained by ShahlaDnshi96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_digikala_pipeline_en_5.5.0_3.0_1725623393058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_digikala_pipeline_en_5.5.0_3.0_1725623393058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_digikala_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_digikala_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_digikala_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|653.5 MB| + +## References + +https://huggingface.co/ShahlaDnshi96/xlm-roberta-base-finetuned-digikala + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en.md new file mode 100644 index 00000000000000..98b3835477a871 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline pipeline XlmRoBertaSentenceEmbeddings from Nadav +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en_5.5.0_3.0_1725622608540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline_en_5.5.0_3.0_1725622608540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_on_runaways_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Nadav/xlm-roberta-base-finetuned-on-runaways-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_en.md new file mode 100644 index 00000000000000..23891510afc62b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_shona XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_shona +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_shona` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_shona_en_5.5.0_3.0_1725622955687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_shona_en_5.5.0_3.0_1725622955687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_shona","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_shona","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_shona| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-shona \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_pipeline_en.md new file mode 100644 index 00000000000000..1ad6ea38027054 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_shona_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_shona_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_shona_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_shona_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_shona_pipeline_en_5.5.0_3.0_1725623014346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_shona_pipeline_en_5.5.0_3.0_1725623014346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_shona_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_shona_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_shona_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-shona + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_xhosa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_xhosa_pipeline_en.md new file mode 100644 index 00000000000000..49943e0dff64e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_xhosa_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_xhosa_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_xhosa_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_xhosa_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_xhosa_pipeline_en_5.5.0_3.0_1725622960685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_xhosa_pipeline_en_5.5.0_3.0_1725622960685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_xhosa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_xhosa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_xhosa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-xhosa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_yoruba_en.md b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_yoruba_en.md new file mode 100644 index 00000000000000..710221738cd40c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sent_xlm_roberta_base_finetuned_yoruba_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_yoruba XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_yoruba +date: 2024-09-06 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_yoruba` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_yoruba_en_5.5.0_3.0_1725623460078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_yoruba_en_5.5.0_3.0_1725623460078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_yoruba","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_yoruba","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_yoruba| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-yoruba \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_en.md new file mode 100644 index 00000000000000..321ffb583b7f46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentiment_analyser_joyo1 DistilBertForSequenceClassification from Joyo1 +author: John Snow Labs +name: sentiment_analyser_joyo1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analyser_joyo1` is a English model originally trained by Joyo1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analyser_joyo1_en_5.5.0_3.0_1725607955233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analyser_joyo1_en_5.5.0_3.0_1725607955233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("sentiment_analyser_joyo1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("sentiment_analyser_joyo1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analyser_joyo1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Joyo1/Sentiment_Analyser \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_pipeline_en.md new file mode 100644 index 00000000000000..d36a857a556088 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analyser_joyo1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_analyser_joyo1_pipeline pipeline DistilBertForSequenceClassification from Joyo1 +author: John Snow Labs +name: sentiment_analyser_joyo1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analyser_joyo1_pipeline` is a English model originally trained by Joyo1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analyser_joyo1_pipeline_en_5.5.0_3.0_1725607967303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analyser_joyo1_pipeline_en_5.5.0_3.0_1725607967303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_analyser_joyo1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_analyser_joyo1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analyser_joyo1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Joyo1/Sentiment_Analyser + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_en.md new file mode 100644 index 00000000000000..5918f8c156b1ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentiment_analysis_sayula_popoluca_neg1 MPNetForSequenceClassification from abhiramd22 +author: John Snow Labs +name: sentiment_analysis_sayula_popoluca_neg1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analysis_sayula_popoluca_neg1` is a English model originally trained by abhiramd22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analysis_sayula_popoluca_neg1_en_5.5.0_3.0_1725629835970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analysis_sayula_popoluca_neg1_en_5.5.0_3.0_1725629835970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("sentiment_analysis_sayula_popoluca_neg1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("sentiment_analysis_sayula_popoluca_neg1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analysis_sayula_popoluca_neg1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/abhiramd22/sentiment_analysis_pos_neg1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_pipeline_en.md new file mode 100644 index 00000000000000..e1ed0ef47e9297 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_analysis_sayula_popoluca_neg1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_analysis_sayula_popoluca_neg1_pipeline pipeline MPNetForSequenceClassification from abhiramd22 +author: John Snow Labs +name: sentiment_analysis_sayula_popoluca_neg1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_analysis_sayula_popoluca_neg1_pipeline` is a English model originally trained by abhiramd22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_analysis_sayula_popoluca_neg1_pipeline_en_5.5.0_3.0_1725629859989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_analysis_sayula_popoluca_neg1_pipeline_en_5.5.0_3.0_1725629859989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_analysis_sayula_popoluca_neg1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_analysis_sayula_popoluca_neg1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_analysis_sayula_popoluca_neg1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/abhiramd22/sentiment_analysis_pos_neg1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_hu.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_hu.md new file mode 100644 index 00000000000000..c595b91f5f8171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_hu.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hungarian sentiment_ohb3_xlm_roberta_hungarian XlmRoBertaForSequenceClassification from NYTK +author: John Snow Labs +name: sentiment_ohb3_xlm_roberta_hungarian +date: 2024-09-06 +tags: [hu, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_ohb3_xlm_roberta_hungarian` is a Hungarian model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_ohb3_xlm_roberta_hungarian_hu_5.5.0_3.0_1725616129174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_ohb3_xlm_roberta_hungarian_hu_5.5.0_3.0_1725616129174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("sentiment_ohb3_xlm_roberta_hungarian","hu") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("sentiment_ohb3_xlm_roberta_hungarian", "hu") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_ohb3_xlm_roberta_hungarian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|hu| +|Size:|799.9 MB| + +## References + +https://huggingface.co/NYTK/sentiment-ohb3-xlm-roberta-hungarian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu.md new file mode 100644 index 00000000000000..b5eaaba752330e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hungarian sentiment_ohb3_xlm_roberta_hungarian_pipeline pipeline XlmRoBertaForSequenceClassification from NYTK +author: John Snow Labs +name: sentiment_ohb3_xlm_roberta_hungarian_pipeline +date: 2024-09-06 +tags: [hu, open_source, pipeline, onnx] +task: Text Classification +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_ohb3_xlm_roberta_hungarian_pipeline` is a Hungarian model originally trained by NYTK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu_5.5.0_3.0_1725616265529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_ohb3_xlm_roberta_hungarian_pipeline_hu_5.5.0_3.0_1725616265529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_ohb3_xlm_roberta_hungarian_pipeline", lang = "hu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_ohb3_xlm_roberta_hungarian_pipeline", lang = "hu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_ohb3_xlm_roberta_hungarian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hu| +|Size:|799.9 MB| + +## References + +https://huggingface.co/NYTK/sentiment-ohb3-xlm-roberta-hungarian + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_en.md new file mode 100644 index 00000000000000..51384a33878803 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sentiment_sentiment_small_temporal_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: sentiment_sentiment_small_temporal_bernice +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_sentiment_small_temporal_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_temporal_bernice_en_5.5.0_3.0_1725620675488.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_temporal_bernice_en_5.5.0_3.0_1725620675488.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("sentiment_sentiment_small_temporal_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("sentiment_sentiment_small_temporal_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_sentiment_small_temporal_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|790.0 MB| + +## References + +https://huggingface.co/tweettemposhift/sentiment-sentiment_small_temporal-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_pipeline_en.md new file mode 100644 index 00000000000000..3e3d0d6a34a5d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sentiment_sentiment_small_temporal_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentiment_sentiment_small_temporal_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: sentiment_sentiment_small_temporal_bernice_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentiment_sentiment_small_temporal_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_temporal_bernice_pipeline_en_5.5.0_3.0_1725620820986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentiment_sentiment_small_temporal_bernice_pipeline_en_5.5.0_3.0_1725620820986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentiment_sentiment_small_temporal_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentiment_sentiment_small_temporal_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentiment_sentiment_small_temporal_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|790.0 MB| + +## References + +https://huggingface.co/tweettemposhift/sentiment-sentiment_small_temporal-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_en.md new file mode 100644 index 00000000000000..10328c1b00f260 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_finetuned_financial_text_classification MPNetEmbeddings from nickmuchi +author: John Snow Labs +name: setfit_finetuned_financial_text_classification +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_finetuned_financial_text_classification` is a English model originally trained by nickmuchi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_finetuned_financial_text_classification_en_5.5.0_3.0_1725595023811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_finetuned_financial_text_classification_en_5.5.0_3.0_1725595023811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_finetuned_financial_text_classification","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_finetuned_financial_text_classification","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_finetuned_financial_text_classification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/nickmuchi/setfit-finetuned-financial-text-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_pipeline_en.md new file mode 100644 index 00000000000000..aad1b8374cbbdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_finetuned_financial_text_classification_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_finetuned_financial_text_classification_pipeline pipeline MPNetEmbeddings from nickmuchi +author: John Snow Labs +name: setfit_finetuned_financial_text_classification_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_finetuned_financial_text_classification_pipeline` is a English model originally trained by nickmuchi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_finetuned_financial_text_classification_pipeline_en_5.5.0_3.0_1725595047099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_finetuned_financial_text_classification_pipeline_en_5.5.0_3.0_1725595047099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_finetuned_financial_text_classification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_finetuned_financial_text_classification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_finetuned_financial_text_classification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/nickmuchi/setfit-finetuned-financial-text-classification + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_en.md new file mode 100644 index 00000000000000..21b95fa75dc198 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_mbti_multiclass_w266_nov29 MPNetEmbeddings from shrinivasbjoshi +author: John Snow Labs +name: setfit_mbti_multiclass_w266_nov29 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_mbti_multiclass_w266_nov29` is a English model originally trained by shrinivasbjoshi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_mbti_multiclass_w266_nov29_en_5.5.0_3.0_1725595696748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_mbti_multiclass_w266_nov29_en_5.5.0_3.0_1725595696748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_mbti_multiclass_w266_nov29","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_mbti_multiclass_w266_nov29","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_mbti_multiclass_w266_nov29| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/shrinivasbjoshi/setfit-mbti-multiclass-w266_Nov29 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_pipeline_en.md new file mode 100644 index 00000000000000..5851bab308f0ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_mbti_multiclass_w266_nov29_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_mbti_multiclass_w266_nov29_pipeline pipeline MPNetEmbeddings from shrinivasbjoshi +author: John Snow Labs +name: setfit_mbti_multiclass_w266_nov29_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_mbti_multiclass_w266_nov29_pipeline` is a English model originally trained by shrinivasbjoshi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_mbti_multiclass_w266_nov29_pipeline_en_5.5.0_3.0_1725595717233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_mbti_multiclass_w266_nov29_pipeline_en_5.5.0_3.0_1725595717233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_mbti_multiclass_w266_nov29_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_mbti_multiclass_w266_nov29_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_mbti_multiclass_w266_nov29_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/shrinivasbjoshi/setfit-mbti-multiclass-w266_Nov29 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_en.md new file mode 100644 index 00000000000000..2ee6687d743462 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_feb11_misinformation_on_global_support MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_feb11_misinformation_on_global_support +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_feb11_misinformation_on_global_support` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_feb11_misinformation_on_global_support_en_5.5.0_3.0_1725595384886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_feb11_misinformation_on_global_support_en_5.5.0_3.0_1725595384886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_feb11_misinformation_on_global_support","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_feb11_misinformation_on_global_support","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_feb11_misinformation_on_global_support| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Feb11-Misinformation-on-Global-Support \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_pipeline_en.md new file mode 100644 index 00000000000000..69fe72baacba5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_feb11_misinformation_on_global_support_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_model_feb11_misinformation_on_global_support_pipeline pipeline MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_feb11_misinformation_on_global_support_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_feb11_misinformation_on_global_support_pipeline` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_feb11_misinformation_on_global_support_pipeline_en_5.5.0_3.0_1725595404842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_feb11_misinformation_on_global_support_pipeline_en_5.5.0_3.0_1725595404842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_model_feb11_misinformation_on_global_support_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_model_feb11_misinformation_on_global_support_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_feb11_misinformation_on_global_support_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Feb11-Misinformation-on-Global-Support + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-setfit_model_test_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_test_2_en.md new file mode 100644 index 00000000000000..b7cbd085e782cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-setfit_model_test_2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_test_2 MPNetEmbeddings from Adipta +author: John Snow Labs +name: setfit_model_test_2 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_test_2` is a English model originally trained by Adipta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_test_2_en_5.5.0_3.0_1725595078272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_test_2_en_5.5.0_3.0_1725595078272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_test_2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_test_2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_test_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Adipta/setfit-model-test-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_en.md new file mode 100644 index 00000000000000..047eb4f3efd43e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English snli_microsoft_deberta_v3_base_seed_2 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: snli_microsoft_deberta_v3_base_seed_2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`snli_microsoft_deberta_v3_base_seed_2` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725589756902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_base_seed_2_en_5.5.0_3.0_1725589756902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("snli_microsoft_deberta_v3_base_seed_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("snli_microsoft_deberta_v3_base_seed_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|snli_microsoft_deberta_v3_base_seed_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|600.0 MB| + +## References + +https://huggingface.co/utahnlp/snli_microsoft_deberta-v3-base_seed-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..819d26aa32f599 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_base_seed_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English snli_microsoft_deberta_v3_base_seed_2_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: snli_microsoft_deberta_v3_base_seed_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`snli_microsoft_deberta_v3_base_seed_2_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_base_seed_2_pipeline_en_5.5.0_3.0_1725589825306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_base_seed_2_pipeline_en_5.5.0_3.0_1725589825306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("snli_microsoft_deberta_v3_base_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("snli_microsoft_deberta_v3_base_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|snli_microsoft_deberta_v3_base_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|600.0 MB| + +## References + +https://huggingface.co/utahnlp/snli_microsoft_deberta-v3-base_seed-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_en.md new file mode 100644 index 00000000000000..19b1f8bb2baab3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English snli_microsoft_deberta_v3_large_seed_1 DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: snli_microsoft_deberta_v3_large_seed_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`snli_microsoft_deberta_v3_large_seed_1` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725591034240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_large_seed_1_en_5.5.0_3.0_1725591034240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("snli_microsoft_deberta_v3_large_seed_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("snli_microsoft_deberta_v3_large_seed_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|snli_microsoft_deberta_v3_large_seed_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/snli_microsoft_deberta-v3-large_seed-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_pipeline_en.md new file mode 100644 index 00000000000000..7866f5a73125cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-snli_microsoft_deberta_v3_large_seed_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English snli_microsoft_deberta_v3_large_seed_1_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: snli_microsoft_deberta_v3_large_seed_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`snli_microsoft_deberta_v3_large_seed_1_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725591152090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/snli_microsoft_deberta_v3_large_seed_1_pipeline_en_5.5.0_3.0_1725591152090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("snli_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("snli_microsoft_deberta_v3_large_seed_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|snli_microsoft_deberta_v3_large_seed_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/utahnlp/snli_microsoft_deberta-v3-large_seed-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_en.md new file mode 100644 index 00000000000000..b2b50ebecb8081 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English song_artist_classifier_v7_alberta AlbertForSequenceClassification from tjl223 +author: John Snow Labs +name: song_artist_classifier_v7_alberta +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`song_artist_classifier_v7_alberta` is a English model originally trained by tjl223. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/song_artist_classifier_v7_alberta_en_5.5.0_3.0_1725628125132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/song_artist_classifier_v7_alberta_en_5.5.0_3.0_1725628125132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("song_artist_classifier_v7_alberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("song_artist_classifier_v7_alberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|song_artist_classifier_v7_alberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/tjl223/song-artist-classifier-v7-alberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_pipeline_en.md new file mode 100644 index 00000000000000..0b0433c71b55f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-song_artist_classifier_v7_alberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English song_artist_classifier_v7_alberta_pipeline pipeline AlbertForSequenceClassification from tjl223 +author: John Snow Labs +name: song_artist_classifier_v7_alberta_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`song_artist_classifier_v7_alberta_pipeline` is a English model originally trained by tjl223. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/song_artist_classifier_v7_alberta_pipeline_en_5.5.0_3.0_1725628127685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/song_artist_classifier_v7_alberta_pipeline_en_5.5.0_3.0_1725628127685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("song_artist_classifier_v7_alberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("song_artist_classifier_v7_alberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|song_artist_classifier_v7_alberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/tjl223/song-artist-classifier-v7-alberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-southern_sotho_all_mpnet_finetuned_arabic_1500_en.md b/docs/_posts/ahmedlone127/2024-09-06-southern_sotho_all_mpnet_finetuned_arabic_1500_en.md new file mode 100644 index 00000000000000..21bc0d76330155 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-southern_sotho_all_mpnet_finetuned_arabic_1500_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English southern_sotho_all_mpnet_finetuned_arabic_1500 MPNetEmbeddings from danfeg +author: John Snow Labs +name: southern_sotho_all_mpnet_finetuned_arabic_1500 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_all_mpnet_finetuned_arabic_1500` is a English model originally trained by danfeg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_arabic_1500_en_5.5.0_3.0_1725595710612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_arabic_1500_en_5.5.0_3.0_1725595710612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_arabic_1500","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_arabic_1500","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_all_mpnet_finetuned_arabic_1500| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/danfeg/ST-ALL-MPNET_Finetuned-AR-1500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-spanish_english_en.md b/docs/_posts/ahmedlone127/2024-09-06-spanish_english_en.md new file mode 100644 index 00000000000000..c3bcc9857f0c1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-spanish_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English spanish_english MarianTransformer from adeebkm +author: John Snow Labs +name: spanish_english +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_english` is a English model originally trained by adeebkm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_english_en_5.5.0_3.0_1725635736086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_english_en_5.5.0_3.0_1725635736086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("spanish_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("spanish_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|539.4 MB| + +## References + +https://huggingface.co/adeebkm/es-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-spanish_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-spanish_english_pipeline_en.md new file mode 100644 index 00000000000000..808f8cadd759e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-spanish_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spanish_english_pipeline pipeline MarianTransformer from adeebkm +author: John Snow Labs +name: spanish_english_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_english_pipeline` is a English model originally trained by adeebkm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_english_pipeline_en_5.5.0_3.0_1725635763344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_english_pipeline_en_5.5.0_3.0_1725635763344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|539.9 MB| + +## References + +https://huggingface.co/adeebkm/es-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_en.md b/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_en.md new file mode 100644 index 00000000000000..92e48e36291e7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English splade_sparse_vector_pinecone BertEmbeddings from joaojanini +author: John Snow Labs +name: splade_sparse_vector_pinecone +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_sparse_vector_pinecone` is a English model originally trained by joaojanini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_sparse_vector_pinecone_en_5.5.0_3.0_1725614444909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_sparse_vector_pinecone_en_5.5.0_3.0_1725614444909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("splade_sparse_vector_pinecone","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("splade_sparse_vector_pinecone","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_sparse_vector_pinecone| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/joaojanini/splade_sparse_vector_pinecone \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_pipeline_en.md new file mode 100644 index 00000000000000..6b63275545c18a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-splade_sparse_vector_pinecone_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English splade_sparse_vector_pinecone_pipeline pipeline BertEmbeddings from joaojanini +author: John Snow Labs +name: splade_sparse_vector_pinecone_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_sparse_vector_pinecone_pipeline` is a English model originally trained by joaojanini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_sparse_vector_pinecone_pipeline_en_5.5.0_3.0_1725614466340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_sparse_vector_pinecone_pipeline_en_5.5.0_3.0_1725614466340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("splade_sparse_vector_pinecone_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("splade_sparse_vector_pinecone_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_sparse_vector_pinecone_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/joaojanini/splade_sparse_vector_pinecone + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_2_pipeline_en.md new file mode 100644 index 00000000000000..c4a8cdf361d8ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sst2_microsoft_deberta_v3_base_seed_2_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: sst2_microsoft_deberta_v3_base_seed_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sst2_microsoft_deberta_v3_base_seed_2_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sst2_microsoft_deberta_v3_base_seed_2_pipeline_en_5.5.0_3.0_1725609806863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sst2_microsoft_deberta_v3_base_seed_2_pipeline_en_5.5.0_3.0_1725609806863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sst2_microsoft_deberta_v3_base_seed_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sst2_microsoft_deberta_v3_base_seed_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sst2_microsoft_deberta_v3_base_seed_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|574.1 MB| + +## References + +https://huggingface.co/utahnlp/sst2_microsoft_deberta-v3-base_seed-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_3_pipeline_en.md new file mode 100644 index 00000000000000..cd671e1a758c5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sst2_microsoft_deberta_v3_base_seed_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sst2_microsoft_deberta_v3_base_seed_3_pipeline pipeline DeBertaForSequenceClassification from utahnlp +author: John Snow Labs +name: sst2_microsoft_deberta_v3_base_seed_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sst2_microsoft_deberta_v3_base_seed_3_pipeline` is a English model originally trained by utahnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sst2_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725590016817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sst2_microsoft_deberta_v3_base_seed_3_pipeline_en_5.5.0_3.0_1725590016817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sst2_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sst2_microsoft_deberta_v3_base_seed_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sst2_microsoft_deberta_v3_base_seed_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|574.1 MB| + +## References + +https://huggingface.co/utahnlp/sst2_microsoft_deberta-v3-base_seed-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en.md new file mode 100644 index 00000000000000..6bc4c68ccd3cde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline pipeline BertForTokenClassification from StanfordAIMI +author: John Snow Labs +name: stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en_5.5.0_3.0_1725634084665.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline_en_5.5.0_3.0_1725634084665.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_with_radiology_reports_and_i2b2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/StanfordAIMI/stanford-deidentifier-with-radiology-reports-and-i2b2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_ko.md b/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_ko.md new file mode 100644 index 00000000000000..0bd4099f2e84b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_ko.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Korean sungbeom_whisper_small_korean_set9 WhisperForCTC from maxseats +author: John Snow Labs +name: sungbeom_whisper_small_korean_set9 +date: 2024-09-06 +tags: [ko, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sungbeom_whisper_small_korean_set9` is a Korean model originally trained by maxseats. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sungbeom_whisper_small_korean_set9_ko_5.5.0_3.0_1725645807194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sungbeom_whisper_small_korean_set9_ko_5.5.0_3.0_1725645807194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("sungbeom_whisper_small_korean_set9","ko") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("sungbeom_whisper_small_korean_set9", "ko") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sungbeom_whisper_small_korean_set9| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ko| +|Size:|1.7 GB| + +## References + +https://huggingface.co/maxseats/SungBeom-whisper-small-ko-set9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_pipeline_ko.md new file mode 100644 index 00000000000000..3b6d015b9b8507 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-sungbeom_whisper_small_korean_set9_pipeline_ko.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Korean sungbeom_whisper_small_korean_set9_pipeline pipeline WhisperForCTC from maxseats +author: John Snow Labs +name: sungbeom_whisper_small_korean_set9_pipeline +date: 2024-09-06 +tags: [ko, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sungbeom_whisper_small_korean_set9_pipeline` is a Korean model originally trained by maxseats. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sungbeom_whisper_small_korean_set9_pipeline_ko_5.5.0_3.0_1725645908953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sungbeom_whisper_small_korean_set9_pipeline_ko_5.5.0_3.0_1725645908953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sungbeom_whisper_small_korean_set9_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sungbeom_whisper_small_korean_set9_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sungbeom_whisper_small_korean_set9_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|1.7 GB| + +## References + +https://huggingface.co/maxseats/SungBeom-whisper-small-ko-set9 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-t2t_gun_nlth_from_base_warmup_en.md b/docs/_posts/ahmedlone127/2024-09-06-t2t_gun_nlth_from_base_warmup_en.md new file mode 100644 index 00000000000000..7fa327bc323800 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-t2t_gun_nlth_from_base_warmup_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English t2t_gun_nlth_from_base_warmup MarianTransformer from tiagoblima +author: John Snow Labs +name: t2t_gun_nlth_from_base_warmup +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`t2t_gun_nlth_from_base_warmup` is a English model originally trained by tiagoblima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_base_warmup_en_5.5.0_3.0_1725636157431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/t2t_gun_nlth_from_base_warmup_en_5.5.0_3.0_1725636157431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("t2t_gun_nlth_from_base_warmup","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("t2t_gun_nlth_from_base_warmup","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|t2t_gun_nlth_from_base_warmup| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|220.3 MB| + +## References + +https://huggingface.co/tiagoblima/t2t-gun-nlth-from-base-warmup \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-task_implicit_task__model_deberta__aug_method_ri_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-task_implicit_task__model_deberta__aug_method_ri_pipeline_en.md new file mode 100644 index 00000000000000..0d8e3f347dc8f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-task_implicit_task__model_deberta__aug_method_ri_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English task_implicit_task__model_deberta__aug_method_ri_pipeline pipeline DeBertaForSequenceClassification from BenjaminOcampo +author: John Snow Labs +name: task_implicit_task__model_deberta__aug_method_ri_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_implicit_task__model_deberta__aug_method_ri_pipeline` is a English model originally trained by BenjaminOcampo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_ri_pipeline_en_5.5.0_3.0_1725588819272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_implicit_task__model_deberta__aug_method_ri_pipeline_en_5.5.0_3.0_1725588819272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("task_implicit_task__model_deberta__aug_method_ri_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("task_implicit_task__model_deberta__aug_method_ri_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_implicit_task__model_deberta__aug_method_ri_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|607.4 MB| + +## References + +https://huggingface.co/BenjaminOcampo/task-implicit_task__model-deberta__aug_method-ri + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_en.md b/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_en.md new file mode 100644 index 00000000000000..490b6c0ffbe8ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English task_subtle_task__model_deberta__aug_method_eda DeBertaForSequenceClassification from BenjaminOcampo +author: John Snow Labs +name: task_subtle_task__model_deberta__aug_method_eda +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, deberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DeBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_subtle_task__model_deberta__aug_method_eda` is a English model originally trained by BenjaminOcampo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_subtle_task__model_deberta__aug_method_eda_en_5.5.0_3.0_1725590089982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_subtle_task__model_deberta__aug_method_eda_en_5.5.0_3.0_1725590089982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DeBertaForSequenceClassification.pretrained("task_subtle_task__model_deberta__aug_method_eda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DeBertaForSequenceClassification.pretrained("task_subtle_task__model_deberta__aug_method_eda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_subtle_task__model_deberta__aug_method_eda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|608.2 MB| + +## References + +https://huggingface.co/BenjaminOcampo/task-subtle_task__model-deberta__aug_method-eda \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_pipeline_en.md new file mode 100644 index 00000000000000..f2b195978c027b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-task_subtle_task__model_deberta__aug_method_eda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English task_subtle_task__model_deberta__aug_method_eda_pipeline pipeline DeBertaForSequenceClassification from BenjaminOcampo +author: John Snow Labs +name: task_subtle_task__model_deberta__aug_method_eda_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DeBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_subtle_task__model_deberta__aug_method_eda_pipeline` is a English model originally trained by BenjaminOcampo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_subtle_task__model_deberta__aug_method_eda_pipeline_en_5.5.0_3.0_1725590127105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_subtle_task__model_deberta__aug_method_eda_pipeline_en_5.5.0_3.0_1725590127105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("task_subtle_task__model_deberta__aug_method_eda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("task_subtle_task__model_deberta__aug_method_eda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_subtle_task__model_deberta__aug_method_eda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|608.2 MB| + +## References + +https://huggingface.co/BenjaminOcampo/task-subtle_task__model-deberta__aug_method-eda + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DeBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-task_text_classification_distilbert_imdb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-task_text_classification_distilbert_imdb_pipeline_en.md new file mode 100644 index 00000000000000..842ea8dc62ecce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-task_text_classification_distilbert_imdb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English task_text_classification_distilbert_imdb_pipeline pipeline DistilBertForSequenceClassification from tunggad +author: John Snow Labs +name: task_text_classification_distilbert_imdb_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_text_classification_distilbert_imdb_pipeline` is a English model originally trained by tunggad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_text_classification_distilbert_imdb_pipeline_en_5.5.0_3.0_1725608547905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_text_classification_distilbert_imdb_pipeline_en_5.5.0_3.0_1725608547905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("task_text_classification_distilbert_imdb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("task_text_classification_distilbert_imdb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_text_classification_distilbert_imdb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/tunggad/task-text-classification-DistilBERT-IMDb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tb_xlm_r_fpt_en.md b/docs/_posts/ahmedlone127/2024-09-06-tb_xlm_r_fpt_en.md new file mode 100644 index 00000000000000..32d5916b3b867e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tb_xlm_r_fpt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tb_xlm_r_fpt XlmRoBertaEmbeddings from aplycaebous +author: John Snow Labs +name: tb_xlm_r_fpt +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tb_xlm_r_fpt` is a English model originally trained by aplycaebous. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tb_xlm_r_fpt_en_5.5.0_3.0_1725596522169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tb_xlm_r_fpt_en_5.5.0_3.0_1725596522169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("tb_xlm_r_fpt","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("tb_xlm_r_fpt","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tb_xlm_r_fpt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/aplycaebous/tb-XLM-R-fpt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tcfd_recommendation_classifier_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-tcfd_recommendation_classifier_v1_en.md new file mode 100644 index 00000000000000..efc306b1c46fa7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tcfd_recommendation_classifier_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tcfd_recommendation_classifier_v1 DistilBertForSequenceClassification from SonnyB +author: John Snow Labs +name: tcfd_recommendation_classifier_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tcfd_recommendation_classifier_v1` is a English model originally trained by SonnyB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tcfd_recommendation_classifier_v1_en_5.5.0_3.0_1725608623554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tcfd_recommendation_classifier_v1_en_5.5.0_3.0_1725608623554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("tcfd_recommendation_classifier_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("tcfd_recommendation_classifier_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tcfd_recommendation_classifier_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/SonnyB/tcfd_recommendation_classifier_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he.md b/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he.md new file mode 100644 index 00000000000000..4ff321f97bb7b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Hebrew teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33 WhisperForCTC from cantillation +author: John Snow Labs +name: teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33 +date: 2024-09-06 +tags: [he, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33` is a Hebrew model originally trained by cantillation. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he_5.5.0_3.0_1725582737192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_he_5.5.0_3.0_1725582737192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33","he") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33", "he") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|he| +|Size:|388.9 MB| + +## References + +https://huggingface.co/cantillation/Teamim-tiny_WeightDecay-0.05_Augmented_Combined-Data_date-10-07-2024_14-33 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he.md b/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he.md new file mode 100644 index 00000000000000..762afbdf06e05e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hebrew teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline pipeline WhisperForCTC from cantillation +author: John Snow Labs +name: teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline +date: 2024-09-06 +tags: [he, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline` is a Hebrew model originally trained by cantillation. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he_5.5.0_3.0_1725582757562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline_he_5.5.0_3.0_1725582757562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|teamim_tiny_weightdecay_0_05_augmented_combined_data_date_10_07_2024_14_33_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|388.9 MB| + +## References + +https://huggingface.co/cantillation/Teamim-tiny_WeightDecay-0.05_Augmented_Combined-Data_date-10-07-2024_14-33 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-telugu_bertu_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-telugu_bertu_ner_en.md new file mode 100644 index 00000000000000..cda914658e9453 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-telugu_bertu_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English telugu_bertu_ner BertForTokenClassification from kuppuluri +author: John Snow Labs +name: telugu_bertu_ner +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`telugu_bertu_ner` is a English model originally trained by kuppuluri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/telugu_bertu_ner_en_5.5.0_3.0_1725663613122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/telugu_bertu_ner_en_5.5.0_3.0_1725663613122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("telugu_bertu_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("telugu_bertu_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|telugu_bertu_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|412.5 MB| + +## References + +https://huggingface.co/kuppuluri/telugu_bertu_ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_model_tambeo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_model_tambeo_pipeline_en.md new file mode 100644 index 00000000000000..a7a90623537843 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_model_tambeo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_model_tambeo_pipeline pipeline DistilBertEmbeddings from TamBeo +author: John Snow Labs +name: test_model_tambeo_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_model_tambeo_pipeline` is a English model originally trained by TamBeo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_model_tambeo_pipeline_en_5.5.0_3.0_1725639794718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_model_tambeo_pipeline_en_5.5.0_3.0_1725639794718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_model_tambeo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_model_tambeo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_model_tambeo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/TamBeo/test_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_ner_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_ner_en.md new file mode 100644 index 00000000000000..3a0b74e4dcb2a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_ner_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English test_ner DistilBertForTokenClassification from Falah +author: John Snow Labs +name: test_ner +date: 2024-09-06 +tags: [bert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_ner` is a English model originally trained by Falah. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_ner_en_5.5.0_3.0_1725599695128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_ner_en_5.5.0_3.0_1725599695128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +tokenClassifier = DistilBertForTokenClassification.pretrained("test_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val tokenClassifier = DistilBertForTokenClassification + .pretrained("test_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +References + +https://huggingface.co/Falah/test-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_ner_pipeline_en.md new file mode 100644 index 00000000000000..0bc99c51f69b91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_ner_pipeline pipeline DistilBertForTokenClassification from farrukhrasool112 +author: John Snow Labs +name: test_ner_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_ner_pipeline` is a English model originally trained by farrukhrasool112. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_ner_pipeline_en_5.5.0_3.0_1725599706958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_ner_pipeline_en_5.5.0_3.0_1725599706958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/farrukhrasool112/test_NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_nhoxxkienn_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_nhoxxkienn_en.md new file mode 100644 index 00000000000000..cfd2380de87ec2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_nhoxxkienn_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_nhoxxkienn DistilBertForQuestionAnswering from NhoxxKienn +author: John Snow Labs +name: test_nhoxxkienn +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_nhoxxkienn` is a English model originally trained by NhoxxKienn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_nhoxxkienn_en_5.5.0_3.0_1725621694489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_nhoxxkienn_en_5.5.0_3.0_1725621694489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("test_nhoxxkienn","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("test_nhoxxkienn", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_nhoxxkienn| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/NhoxxKienn/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_qa_model_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_qa_model_1_en.md new file mode 100644 index 00000000000000..ad3dcc73051348 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_qa_model_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_qa_model_1 DistilBertForQuestionAnswering from KasunRajitha +author: John Snow Labs +name: test_qa_model_1 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_qa_model_1` is a English model originally trained by KasunRajitha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_qa_model_1_en_5.5.0_3.0_1725622148687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_qa_model_1_en_5.5.0_3.0_1725622148687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("test_qa_model_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("test_qa_model_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_qa_model_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KasunRajitha/test_qa_model_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_test_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_test_en.md new file mode 100644 index 00000000000000..86dbcf2cc19473 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_test_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_test MPNetEmbeddings from marco-gancitano +author: John Snow Labs +name: test_test +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_test` is a English model originally trained by marco-gancitano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_test_en_5.5.0_3.0_1725595693425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_test_en_5.5.0_3.0_1725595693425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("test_test","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("test_test","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/marco-gancitano/test-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-test_with_web_interface_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-test_with_web_interface_pipeline_en.md new file mode 100644 index 00000000000000..f9bf891676ecab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-test_with_web_interface_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_with_web_interface_pipeline pipeline CamemBertEmbeddings from Hasanmurad +author: John Snow Labs +name: test_with_web_interface_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_with_web_interface_pipeline` is a English model originally trained by Hasanmurad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_with_web_interface_pipeline_en_5.5.0_3.0_1725637159340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_with_web_interface_pipeline_en_5.5.0_3.0_1725637159340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_with_web_interface_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_with_web_interface_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_with_web_interface_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/Hasanmurad/test_with_web_interface + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_en.md b/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_en.md new file mode 100644 index 00000000000000..1dabdc453fa328 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English text_classification_yaqinlah DistilBertForSequenceClassification from yaqinlah +author: John Snow Labs +name: text_classification_yaqinlah +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`text_classification_yaqinlah` is a English model originally trained by yaqinlah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/text_classification_yaqinlah_en_5.5.0_3.0_1725607834939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/text_classification_yaqinlah_en_5.5.0_3.0_1725607834939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("text_classification_yaqinlah","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("text_classification_yaqinlah", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|text_classification_yaqinlah| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/yaqinlah/text_classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_pipeline_en.md new file mode 100644 index 00000000000000..f13a63125b2475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-text_classification_yaqinlah_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English text_classification_yaqinlah_pipeline pipeline DistilBertForSequenceClassification from yaqinlah +author: John Snow Labs +name: text_classification_yaqinlah_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`text_classification_yaqinlah_pipeline` is a English model originally trained by yaqinlah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/text_classification_yaqinlah_pipeline_en_5.5.0_3.0_1725607846635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/text_classification_yaqinlah_pipeline_en_5.5.0_3.0_1725607846635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("text_classification_yaqinlah_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("text_classification_yaqinlah_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|text_classification_yaqinlah_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/yaqinlah/text_classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-text_complexity_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-06-text_complexity_roberta_en.md new file mode 100644 index 00000000000000..16ff0b3e9c8937 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-text_complexity_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English text_complexity_roberta XlmRoBertaForTokenClassification from k0nv1ct +author: John Snow Labs +name: text_complexity_roberta +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`text_complexity_roberta` is a English model originally trained by k0nv1ct. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/text_complexity_roberta_en_5.5.0_3.0_1725591806832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/text_complexity_roberta_en_5.5.0_3.0_1725591806832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("text_complexity_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("text_complexity_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|text_complexity_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|805.9 MB| + +## References + +https://huggingface.co/k0nv1ct/text-complexity-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_5000_en.md b/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_5000_en.md new file mode 100644 index 00000000000000..ba2a9a4024e9ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_5000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tiny_bert_0102_5000 AlbertForSequenceClassification from gg-ai +author: John Snow Labs +name: tiny_bert_0102_5000 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_0102_5000` is a English model originally trained by gg-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_0102_5000_en_5.5.0_3.0_1725662286333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_0102_5000_en_5.5.0_3.0_1725662286333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_bert_0102_5000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_bert_0102_5000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_0102_5000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|20.4 MB| + +## References + +https://huggingface.co/gg-ai/tiny-bert-0102-5000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_last_iter_en.md b/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_last_iter_en.md new file mode 100644 index 00000000000000..dbbc4cfa303fd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tiny_bert_0102_last_iter_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tiny_bert_0102_last_iter AlbertForSequenceClassification from gg-ai +author: John Snow Labs +name: tiny_bert_0102_last_iter +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_0102_last_iter` is a English model originally trained by gg-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_0102_last_iter_en_5.5.0_3.0_1725628375270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_0102_last_iter_en_5.5.0_3.0_1725628375270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_bert_0102_last_iter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("tiny_bert_0102_last_iter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_0102_last_iter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|20.4 MB| + +## References + +https://huggingface.co/gg-ai/tiny-bert-0102-last-iter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tiny_random_mpnetforsequenceclassification_en.md b/docs/_posts/ahmedlone127/2024-09-06-tiny_random_mpnetforsequenceclassification_en.md new file mode 100644 index 00000000000000..55e14c0adf1dbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tiny_random_mpnetforsequenceclassification_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tiny_random_mpnetforsequenceclassification MPNetForSequenceClassification from hf-tiny-model-private +author: John Snow Labs +name: tiny_random_mpnetforsequenceclassification +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_random_mpnetforsequenceclassification` is a English model originally trained by hf-tiny-model-private. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_random_mpnetforsequenceclassification_en_5.5.0_3.0_1725629680628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_random_mpnetforsequenceclassification_en_5.5.0_3.0_1725629680628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("tiny_random_mpnetforsequenceclassification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("tiny_random_mpnetforsequenceclassification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_random_mpnetforsequenceclassification| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|905.1 KB| + +## References + +https://huggingface.co/hf-tiny-model-private/tiny-random-MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_en.md b/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_en.md new file mode 100644 index 00000000000000..30c19a9d6bda22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tntoeng_by_ibrahim_v1 MarianTransformer from IbrahimAL24 +author: John Snow Labs +name: tntoeng_by_ibrahim_v1 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tntoeng_by_ibrahim_v1` is a English model originally trained by IbrahimAL24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tntoeng_by_ibrahim_v1_en_5.5.0_3.0_1725635491497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tntoeng_by_ibrahim_v1_en_5.5.0_3.0_1725635491497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("tntoeng_by_ibrahim_v1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("tntoeng_by_ibrahim_v1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tntoeng_by_ibrahim_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.8 MB| + +## References + +https://huggingface.co/IbrahimAL24/TNtoEng-By-Ibrahim-V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_pipeline_en.md new file mode 100644 index 00000000000000..854696ec118b99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tntoeng_by_ibrahim_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tntoeng_by_ibrahim_v1_pipeline pipeline MarianTransformer from IbrahimAL24 +author: John Snow Labs +name: tntoeng_by_ibrahim_v1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tntoeng_by_ibrahim_v1_pipeline` is a English model originally trained by IbrahimAL24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tntoeng_by_ibrahim_v1_pipeline_en_5.5.0_3.0_1725635517886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tntoeng_by_ibrahim_v1_pipeline_en_5.5.0_3.0_1725635517886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tntoeng_by_ibrahim_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tntoeng_by_ibrahim_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tntoeng_by_ibrahim_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.3 MB| + +## References + +https://huggingface.co/IbrahimAL24/TNtoEng-By-Ibrahim-V1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-topic_topic_random0_seed2_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-topic_topic_random0_seed2_bernice_pipeline_en.md new file mode 100644 index 00000000000000..ac928582f2b8c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-topic_topic_random0_seed2_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English topic_topic_random0_seed2_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: topic_topic_random0_seed2_bernice_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_topic_random0_seed2_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_topic_random0_seed2_bernice_pipeline_en_5.5.0_3.0_1725620599589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_topic_random0_seed2_bernice_pipeline_en_5.5.0_3.0_1725620599589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("topic_topic_random0_seed2_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("topic_topic_random0_seed2_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_topic_random0_seed2_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|805.5 MB| + +## References + +https://huggingface.co/tweettemposhift/topic-topic_random0_seed2-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-torch_distilbert_policies_comparison_en.md b/docs/_posts/ahmedlone127/2024-09-06-torch_distilbert_policies_comparison_en.md new file mode 100644 index 00000000000000..d5bd9203a4143f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-torch_distilbert_policies_comparison_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English torch_distilbert_policies_comparison DistilBertForSequenceClassification from rubivivi +author: John Snow Labs +name: torch_distilbert_policies_comparison +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`torch_distilbert_policies_comparison` is a English model originally trained by rubivivi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/torch_distilbert_policies_comparison_en_5.5.0_3.0_1725607834866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/torch_distilbert_policies_comparison_en_5.5.0_3.0_1725607834866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("torch_distilbert_policies_comparison","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("torch_distilbert_policies_comparison", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|torch_distilbert_policies_comparison| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/rubivivi/torch_distilbert_policies_comparison \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_es.md b/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_es.md new file mode 100644 index 00000000000000..fa342421b1212e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish toxicity_model RoBertaForSequenceClassification from rsepulvedat +author: John Snow Labs +name: toxicity_model +date: 2024-09-06 +tags: [es, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`toxicity_model` is a Castilian, Spanish model originally trained by rsepulvedat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/toxicity_model_es_5.5.0_3.0_1725613888570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/toxicity_model_es_5.5.0_3.0_1725613888570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("toxicity_model","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("toxicity_model", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|toxicity_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|es| +|Size:|431.8 MB| + +## References + +https://huggingface.co/rsepulvedat/Toxicity_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_pipeline_es.md new file mode 100644 index 00000000000000..ea7216350a04cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-toxicity_model_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish toxicity_model_pipeline pipeline RoBertaForSequenceClassification from rsepulvedat +author: John Snow Labs +name: toxicity_model_pipeline +date: 2024-09-06 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`toxicity_model_pipeline` is a Castilian, Spanish model originally trained by rsepulvedat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/toxicity_model_pipeline_es_5.5.0_3.0_1725613922203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/toxicity_model_pipeline_es_5.5.0_3.0_1725613922203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("toxicity_model_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("toxicity_model_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|toxicity_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|431.9 MB| + +## References + +https://huggingface.co/rsepulvedat/Toxicity_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_en.md b/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_en.md new file mode 100644 index 00000000000000..b85cc4b1663103 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English translation_english_lug_v3 MarianTransformer from atwine +author: John Snow Labs +name: translation_english_lug_v3 +date: 2024-09-06 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_english_lug_v3` is a English model originally trained by atwine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_english_lug_v3_en_5.5.0_3.0_1725634958805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_english_lug_v3_en_5.5.0_3.0_1725634958805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("translation_english_lug_v3","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("translation_english_lug_v3","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_english_lug_v3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|513.3 MB| + +## References + +https://huggingface.co/atwine/translation-en-lug-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_pipeline_en.md new file mode 100644 index 00000000000000..9600db2f731701 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-translation_english_lug_v3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English translation_english_lug_v3_pipeline pipeline MarianTransformer from atwine +author: John Snow Labs +name: translation_english_lug_v3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_english_lug_v3_pipeline` is a English model originally trained by atwine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_english_lug_v3_pipeline_en_5.5.0_3.0_1725634993489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_english_lug_v3_pipeline_en_5.5.0_3.0_1725634993489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translation_english_lug_v3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translation_english_lug_v3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_english_lug_v3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|513.8 MB| + +## References + +https://huggingface.co/atwine/translation-en-lug-v3 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_en.md b/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_en.md new file mode 100644 index 00000000000000..505a05c8b176c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English tuf_fewshot MPNetEmbeddings from pig4431 +author: John Snow Labs +name: tuf_fewshot +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tuf_fewshot` is a English model originally trained by pig4431. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tuf_fewshot_en_5.5.0_3.0_1725595251276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tuf_fewshot_en_5.5.0_3.0_1725595251276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("tuf_fewshot","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("tuf_fewshot","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tuf_fewshot| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/pig4431/TUF_fewshot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_pipeline_en.md new file mode 100644 index 00000000000000..f3d36051e75a3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-tuf_fewshot_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English tuf_fewshot_pipeline pipeline MPNetEmbeddings from pig4431 +author: John Snow Labs +name: tuf_fewshot_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tuf_fewshot_pipeline` is a English model originally trained by pig4431. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tuf_fewshot_pipeline_en_5.5.0_3.0_1725595272981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tuf_fewshot_pipeline_en_5.5.0_3.0_1725595272981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tuf_fewshot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tuf_fewshot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tuf_fewshot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/pig4431/TUF_fewshot + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-useless_model_try_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-useless_model_try_1_en.md new file mode 100644 index 00000000000000..0d627a98816fc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-useless_model_try_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English useless_model_try_1 RoBertaForSequenceClassification from timoneda +author: John Snow Labs +name: useless_model_try_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`useless_model_try_1` is a English model originally trained by timoneda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/useless_model_try_1_en_5.5.0_3.0_1725612846293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/useless_model_try_1_en_5.5.0_3.0_1725612846293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("useless_model_try_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("useless_model_try_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|useless_model_try_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/timoneda/useless_model_try_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-village_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-village_model_pipeline_en.md new file mode 100644 index 00000000000000..96f7e3222d2a40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-village_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English village_model_pipeline pipeline DistilBertForQuestionAnswering from rugvedabodke +author: John Snow Labs +name: village_model_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`village_model_pipeline` is a English model originally trained by rugvedabodke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/village_model_pipeline_en_5.5.0_3.0_1725652603615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/village_model_pipeline_en_5.5.0_3.0_1725652603615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("village_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("village_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|village_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rugvedabodke/village_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_hi.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_hi.md new file mode 100644 index 00000000000000..916551dc002807 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_hi.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Hindi whisper_age_estimator WhisperForCTC from codelion +author: John Snow Labs +name: whisper_age_estimator +date: 2024-09-06 +tags: [hi, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_age_estimator` is a Hindi model originally trained by codelion. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_age_estimator_hi_5.5.0_3.0_1725641632608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_age_estimator_hi_5.5.0_3.0_1725641632608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_age_estimator","hi") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_age_estimator", "hi") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_age_estimator| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|hi| +|Size:|643.5 MB| + +## References + +https://huggingface.co/codelion/whisper-age-estimator \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_pipeline_hi.md new file mode 100644 index 00000000000000..c5ece1bfbc80b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_age_estimator_pipeline_hi.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hindi whisper_age_estimator_pipeline pipeline WhisperForCTC from codelion +author: John Snow Labs +name: whisper_age_estimator_pipeline +date: 2024-09-06 +tags: [hi, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_age_estimator_pipeline` is a Hindi model originally trained by codelion. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_age_estimator_pipeline_hi_5.5.0_3.0_1725641668668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_age_estimator_pipeline_hi_5.5.0_3.0_1725641668668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_age_estimator_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_age_estimator_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_age_estimator_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|643.5 MB| + +## References + +https://huggingface.co/codelion/whisper-age-estimator + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_pipeline_ru.md new file mode 100644 index 00000000000000..4595d848cdb3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_pipeline_ru.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Russian whisper_base_fine_tuned_russian_pipeline pipeline WhisperForCTC from artyomboyko +author: John Snow Labs +name: whisper_base_fine_tuned_russian_pipeline +date: 2024-09-06 +tags: [ru, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_fine_tuned_russian_pipeline` is a Russian model originally trained by artyomboyko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_fine_tuned_russian_pipeline_ru_5.5.0_3.0_1725601678629.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_fine_tuned_russian_pipeline_ru_5.5.0_3.0_1725601678629.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_fine_tuned_russian_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_fine_tuned_russian_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_fine_tuned_russian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|642.9 MB| + +## References + +https://huggingface.co/artyomboyko/whisper-base-fine_tuned-ru + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_ru.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_ru.md new file mode 100644 index 00000000000000..d57fee983a4e78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_fine_tuned_russian_ru.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Russian whisper_base_fine_tuned_russian WhisperForCTC from artyomboyko +author: John Snow Labs +name: whisper_base_fine_tuned_russian +date: 2024-09-06 +tags: [ru, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_fine_tuned_russian` is a Russian model originally trained by artyomboyko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_fine_tuned_russian_ru_5.5.0_3.0_1725601637537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_fine_tuned_russian_ru_5.5.0_3.0_1725601637537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_base_fine_tuned_russian","ru") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_base_fine_tuned_russian", "ru") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_fine_tuned_russian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ru| +|Size:|642.9 MB| + +## References + +https://huggingface.co/artyomboyko/whisper-base-fine_tuned-ru \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_en.md new file mode 100644 index 00000000000000..9263a1422058a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_base_wolof_cifope WhisperForCTC from cifope +author: John Snow Labs +name: whisper_base_wolof_cifope +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_wolof_cifope` is a English model originally trained by cifope. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_wolof_cifope_en_5.5.0_3.0_1725603382972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_wolof_cifope_en_5.5.0_3.0_1725603382972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_base_wolof_cifope","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_base_wolof_cifope", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_wolof_cifope| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|641.5 MB| + +## References + +https://huggingface.co/cifope/whisper-base-wolof \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_pipeline_en.md new file mode 100644 index 00000000000000..956f3d5047edd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_base_wolof_cifope_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_base_wolof_cifope_pipeline pipeline WhisperForCTC from cifope +author: John Snow Labs +name: whisper_base_wolof_cifope_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_wolof_cifope_pipeline` is a English model originally trained by cifope. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_wolof_cifope_pipeline_en_5.5.0_3.0_1725603416084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_wolof_cifope_pipeline_en_5.5.0_3.0_1725603416084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_wolof_cifope_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_wolof_cifope_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_wolof_cifope_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|641.6 MB| + +## References + +https://huggingface.co/cifope/whisper-base-wolof + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_en.md new file mode 100644 index 00000000000000..7565fc4bd83785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_finetuning_phoneme_transcription_g2p_example_3 WhisperForCTC from dg96 +author: John Snow Labs +name: whisper_finetuning_phoneme_transcription_g2p_example_3 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_finetuning_phoneme_transcription_g2p_example_3` is a English model originally trained by dg96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_finetuning_phoneme_transcription_g2p_example_3_en_5.5.0_3.0_1725604181281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_finetuning_phoneme_transcription_g2p_example_3_en_5.5.0_3.0_1725604181281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_finetuning_phoneme_transcription_g2p_example_3","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_finetuning_phoneme_transcription_g2p_example_3", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_finetuning_phoneme_transcription_g2p_example_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|643.6 MB| + +## References + +https://huggingface.co/dg96/whisper-finetuning-phoneme-transcription-g2p-example-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en.md new file mode 100644 index 00000000000000..f48ce78c9c2302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline pipeline WhisperForCTC from dg96 +author: John Snow Labs +name: whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline` is a English model originally trained by dg96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en_5.5.0_3.0_1725604214446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline_en_5.5.0_3.0_1725604214446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_finetuning_phoneme_transcription_g2p_example_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|643.6 MB| + +## References + +https://huggingface.co/dg96/whisper-finetuning-phoneme-transcription-g2p-example-3 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_it.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_it.md new file mode 100644 index 00000000000000..bf1b4e6633ae4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_it.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Italian whisper_italian_small WhisperForCTC from ALM +author: John Snow Labs +name: whisper_italian_small +date: 2024-09-06 +tags: [it, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_italian_small` is a Italian model originally trained by ALM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_italian_small_it_5.5.0_3.0_1725642103775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_italian_small_it_5.5.0_3.0_1725642103775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_italian_small","it") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_italian_small", "it") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_italian_small| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|it| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ALM/whisper-it-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_pipeline_it.md new file mode 100644 index 00000000000000..26ff70a02d6cf3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_italian_small_pipeline_it.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Italian whisper_italian_small_pipeline pipeline WhisperForCTC from ALM +author: John Snow Labs +name: whisper_italian_small_pipeline +date: 2024-09-06 +tags: [it, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_italian_small_pipeline` is a Italian model originally trained by ALM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_italian_small_pipeline_it_5.5.0_3.0_1725642196600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_italian_small_pipeline_it_5.5.0_3.0_1725642196600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_italian_small_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_italian_small_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_italian_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ALM/whisper-it-small + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_arabic_arbml_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_arabic_arbml_en.md new file mode 100644 index 00000000000000..7b13a0cf197cff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_arabic_arbml_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_medium_arabic_arbml WhisperForCTC from arbml +author: John Snow Labs +name: whisper_medium_arabic_arbml +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_arabic_arbml` is a English model originally trained by arbml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_arabic_arbml_en_5.5.0_3.0_1725582249766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_arabic_arbml_en_5.5.0_3.0_1725582249766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_arabic_arbml","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_arabic_arbml", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_arabic_arbml| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/arbml/whisper-medium-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_eg_ar.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_eg_ar.md new file mode 100644 index 00000000000000..4c086c58d88ed6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_eg_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic whisper_medium_eg WhisperForCTC from abuelnasr +author: John Snow Labs +name: whisper_medium_eg +date: 2024-09-06 +tags: [ar, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_eg` is a Arabic model originally trained by abuelnasr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_eg_ar_5.5.0_3.0_1725602858602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_eg_ar_5.5.0_3.0_1725602858602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_eg","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_eg", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_eg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|4.8 GB| + +## References + +https://huggingface.co/abuelnasr/whisper-medium-eg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_english_santhosh643_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_english_santhosh643_en.md new file mode 100644 index 00000000000000..80d1d5b2e7cbaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_english_santhosh643_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_medium_english_santhosh643 WhisperForCTC from santhosh643 +author: John Snow Labs +name: whisper_medium_english_santhosh643 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_english_santhosh643` is a English model originally trained by santhosh643. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_english_santhosh643_en_5.5.0_3.0_1725645040807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_english_santhosh643_en_5.5.0_3.0_1725645040807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_english_santhosh643","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_english_santhosh643", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_english_santhosh643| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/santhosh643/whisper-medium-english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_medical_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_medical_pipeline_en.md new file mode 100644 index 00000000000000..2b5cee349510c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_medical_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_medium_medical_pipeline pipeline WhisperForCTC from mahendra0203 +author: John Snow Labs +name: whisper_medium_medical_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_medical_pipeline` is a English model originally trained by mahendra0203. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_medical_pipeline_en_5.5.0_3.0_1725648599077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_medical_pipeline_en_5.5.0_3.0_1725648599077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_medium_medical_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_medium_medical_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_medical_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/mahendra0203/whisper-medium-medical + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_portuguese_3000h_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_portuguese_3000h_en.md new file mode 100644 index 00000000000000..d62dcea9972399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_medium_portuguese_3000h_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_medium_portuguese_3000h WhisperForCTC from fsicoli +author: John Snow Labs +name: whisper_medium_portuguese_3000h +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_medium_portuguese_3000h` is a English model originally trained by fsicoli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_medium_portuguese_3000h_en_5.5.0_3.0_1725587505029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_medium_portuguese_3000h_en_5.5.0_3.0_1725587505029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_medium_portuguese_3000h","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_medium_portuguese_3000h", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_medium_portuguese_3000h| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.7 GB| + +## References + +https://huggingface.co/fsicoli/whisper-medium-pt-3000h \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_en.md new file mode 100644 index 00000000000000..74fd15ca0ece7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_samll_hassanya WhisperForCTC from abscheik +author: John Snow Labs +name: whisper_samll_hassanya +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_samll_hassanya` is a English model originally trained by abscheik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_samll_hassanya_en_5.5.0_3.0_1725584447447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_samll_hassanya_en_5.5.0_3.0_1725584447447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_samll_hassanya","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_samll_hassanya", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_samll_hassanya| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/abscheik/whisper-samll-hassanya \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_pipeline_en.md new file mode 100644 index 00000000000000..41a8a6d0275500 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_samll_hassanya_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_samll_hassanya_pipeline pipeline WhisperForCTC from abscheik +author: John Snow Labs +name: whisper_samll_hassanya_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_samll_hassanya_pipeline` is a English model originally trained by abscheik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_samll_hassanya_pipeline_en_5.5.0_3.0_1725584537817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_samll_hassanya_pipeline_en_5.5.0_3.0_1725584537817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_samll_hassanya_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_samll_hassanya_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_samll_hassanya_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/abscheik/whisper-samll-hassanya + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_ar.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_ar.md new file mode 100644 index 00000000000000..d58d853d0569bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic whisper_small_arabic_gnsepili WhisperForCTC from gnsepili +author: John Snow Labs +name: whisper_small_arabic_gnsepili +date: 2024-09-06 +tags: [ar, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_gnsepili` is a Arabic model originally trained by gnsepili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_gnsepili_ar_5.5.0_3.0_1725646406585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_gnsepili_ar_5.5.0_3.0_1725646406585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_arabic_gnsepili","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_arabic_gnsepili", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_gnsepili| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|1.1 GB| + +## References + +https://huggingface.co/gnsepili/whisper-small-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_pipeline_ar.md new file mode 100644 index 00000000000000..8d7f75f252f66f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_arabic_gnsepili_pipeline_ar.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Arabic whisper_small_arabic_gnsepili_pipeline pipeline WhisperForCTC from gnsepili +author: John Snow Labs +name: whisper_small_arabic_gnsepili_pipeline +date: 2024-09-06 +tags: [ar, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_arabic_gnsepili_pipeline` is a Arabic model originally trained by gnsepili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_gnsepili_pipeline_ar_5.5.0_3.0_1725646703521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_arabic_gnsepili_pipeline_ar_5.5.0_3.0_1725646703521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_arabic_gnsepili_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_arabic_gnsepili_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_arabic_gnsepili_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|1.1 GB| + +## References + +https://huggingface.co/gnsepili/whisper-small-ar + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_en.md new file mode 100644 index 00000000000000..95204a6e42defb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_bb_vahn98 WhisperForCTC from vahn98 +author: John Snow Labs +name: whisper_small_bb_vahn98 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_bb_vahn98` is a English model originally trained by vahn98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_bb_vahn98_en_5.5.0_3.0_1725583666781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_bb_vahn98_en_5.5.0_3.0_1725583666781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_bb_vahn98","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_bb_vahn98", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_bb_vahn98| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/vahn98/whisper-small-bb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_pipeline_en.md new file mode 100644 index 00000000000000..cc76c22fb45dbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_bb_vahn98_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_bb_vahn98_pipeline pipeline WhisperForCTC from vahn98 +author: John Snow Labs +name: whisper_small_bb_vahn98_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_bb_vahn98_pipeline` is a English model originally trained by vahn98. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_bb_vahn98_pipeline_en_5.5.0_3.0_1725583753031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_bb_vahn98_pipeline_en_5.5.0_3.0_1725583753031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_bb_vahn98_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_bb_vahn98_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_bb_vahn98_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/vahn98/whisper-small-bb + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_be.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_be.md new file mode 100644 index 00000000000000..f5e448e6639206 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_be.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Belarusian whisper_small_belarusian WhisperForCTC from ales +author: John Snow Labs +name: whisper_small_belarusian +date: 2024-09-06 +tags: [be, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: be +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_belarusian` is a Belarusian model originally trained by ales. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_belarusian_be_5.5.0_3.0_1725602124841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_belarusian_be_5.5.0_3.0_1725602124841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_belarusian","be") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_belarusian", "be") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_belarusian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|be| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ales/whisper-small-belarusian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_pipeline_be.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_pipeline_be.md new file mode 100644 index 00000000000000..7c116549dab7ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_belarusian_pipeline_be.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Belarusian whisper_small_belarusian_pipeline pipeline WhisperForCTC from ales +author: John Snow Labs +name: whisper_small_belarusian_pipeline +date: 2024-09-06 +tags: [be, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: be +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_belarusian_pipeline` is a Belarusian model originally trained by ales. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_belarusian_pipeline_be_5.5.0_3.0_1725602215990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_belarusian_pipeline_be_5.5.0_3.0_1725602215990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_belarusian_pipeline", lang = "be") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_belarusian_pipeline", lang = "be") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_belarusian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|be| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ales/whisper-small-belarusian + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_br.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_br.md new file mode 100644 index 00000000000000..05ccaa6cf39413 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_br.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Breton whisper_small_breton_arzhurkoadek WhisperForCTC from ArzhurKoadek +author: John Snow Labs +name: whisper_small_breton_arzhurkoadek +date: 2024-09-06 +tags: [br, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: br +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_breton_arzhurkoadek` is a Breton model originally trained by ArzhurKoadek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_breton_arzhurkoadek_br_5.5.0_3.0_1725583156607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_breton_arzhurkoadek_br_5.5.0_3.0_1725583156607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_breton_arzhurkoadek","br") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_breton_arzhurkoadek", "br") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_breton_arzhurkoadek| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|br| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ArzhurKoadek/whisper-small-br \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_pipeline_br.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_pipeline_br.md new file mode 100644 index 00000000000000..41d2f1179c4f00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_breton_arzhurkoadek_pipeline_br.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Breton whisper_small_breton_arzhurkoadek_pipeline pipeline WhisperForCTC from ArzhurKoadek +author: John Snow Labs +name: whisper_small_breton_arzhurkoadek_pipeline +date: 2024-09-06 +tags: [br, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: br +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_breton_arzhurkoadek_pipeline` is a Breton model originally trained by ArzhurKoadek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_breton_arzhurkoadek_pipeline_br_5.5.0_3.0_1725583248877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_breton_arzhurkoadek_pipeline_br_5.5.0_3.0_1725583248877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_breton_arzhurkoadek_pipeline", lang = "br") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_breton_arzhurkoadek_pipeline", lang = "br") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_breton_arzhurkoadek_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|br| +|Size:|1.7 GB| + +## References + +https://huggingface.co/ArzhurKoadek/whisper-small-br + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_en.md new file mode 100644 index 00000000000000..ccd5cf0966d309 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_chinese_desktop WhisperForCTC from FritzYC +author: John Snow Labs +name: whisper_small_chinese_desktop +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chinese_desktop` is a English model originally trained by FritzYC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_desktop_en_5.5.0_3.0_1725644949153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_desktop_en_5.5.0_3.0_1725644949153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_chinese_desktop","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_chinese_desktop", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chinese_desktop| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/FritzYC/whisper-small-zh-desktop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_pipeline_en.md new file mode 100644 index 00000000000000..5411f472bddd2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_desktop_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_chinese_desktop_pipeline pipeline WhisperForCTC from FritzYC +author: John Snow Labs +name: whisper_small_chinese_desktop_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chinese_desktop_pipeline` is a English model originally trained by FritzYC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_desktop_pipeline_en_5.5.0_3.0_1725645032932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_desktop_pipeline_en_5.5.0_3.0_1725645032932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_chinese_desktop_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_chinese_desktop_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chinese_desktop_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/FritzYC/whisper-small-zh-desktop + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_pipeline_zh.md new file mode 100644 index 00000000000000..2fff44d1453d95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_pipeline_zh.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Chinese whisper_small_chinese_twi_kimbochen_pipeline pipeline WhisperForCTC from kimbochen +author: John Snow Labs +name: whisper_small_chinese_twi_kimbochen_pipeline +date: 2024-09-06 +tags: [zh, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chinese_twi_kimbochen_pipeline` is a Chinese model originally trained by kimbochen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_twi_kimbochen_pipeline_zh_5.5.0_3.0_1725645526441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_twi_kimbochen_pipeline_zh_5.5.0_3.0_1725645526441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_chinese_twi_kimbochen_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_chinese_twi_kimbochen_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chinese_twi_kimbochen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|1.7 GB| + +## References + +https://huggingface.co/kimbochen/whisper-small-zh-tw + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_zh.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_zh.md new file mode 100644 index 00000000000000..1dc5ee42b0b531 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chinese_twi_kimbochen_zh.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Chinese whisper_small_chinese_twi_kimbochen WhisperForCTC from kimbochen +author: John Snow Labs +name: whisper_small_chinese_twi_kimbochen +date: 2024-09-06 +tags: [zh, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chinese_twi_kimbochen` is a Chinese model originally trained by kimbochen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_twi_kimbochen_zh_5.5.0_3.0_1725645443561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chinese_twi_kimbochen_zh_5.5.0_3.0_1725645443561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_chinese_twi_kimbochen","zh") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_chinese_twi_kimbochen", "zh") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chinese_twi_kimbochen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|zh| +|Size:|1.7 GB| + +## References + +https://huggingface.co/kimbochen/whisper-small-zh-tw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_ar.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_ar.md new file mode 100644 index 00000000000000..4f939e11897eb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_ar.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Arabic whisper_small_chuvash_arabic WhisperForCTC from arbml +author: John Snow Labs +name: whisper_small_chuvash_arabic +date: 2024-09-06 +tags: [ar, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chuvash_arabic` is a Arabic model originally trained by arbml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chuvash_arabic_ar_5.5.0_3.0_1725606659840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chuvash_arabic_ar_5.5.0_3.0_1725606659840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_chuvash_arabic","ar") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_chuvash_arabic", "ar") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chuvash_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ar| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arbml/whisper-small-cv-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_pipeline_ar.md new file mode 100644 index 00000000000000..f277b7c074e7c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_chuvash_arabic_pipeline_ar.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Arabic whisper_small_chuvash_arabic_pipeline pipeline WhisperForCTC from arbml +author: John Snow Labs +name: whisper_small_chuvash_arabic_pipeline +date: 2024-09-06 +tags: [ar, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_chuvash_arabic_pipeline` is a Arabic model originally trained by arbml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_chuvash_arabic_pipeline_ar_5.5.0_3.0_1725606740908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_chuvash_arabic_pipeline_ar_5.5.0_3.0_1725606740908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_chuvash_arabic_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_chuvash_arabic_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_chuvash_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arbml/whisper-small-cv-ar + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_fa.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_fa.md new file mode 100644 index 00000000000000..a2a7f0b4d9bd15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_fa.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Persian whisper_small_common_voice WhisperForCTC from makhataei +author: John Snow Labs +name: whisper_small_common_voice +date: 2024-09-06 +tags: [fa, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_common_voice` is a Persian model originally trained by makhataei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_common_voice_fa_5.5.0_3.0_1725643181117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_common_voice_fa_5.5.0_3.0_1725643181117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_common_voice","fa") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_common_voice", "fa") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_common_voice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fa| +|Size:|1.7 GB| + +## References + +https://huggingface.co/makhataei/Whisper-Small-Common-Voice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_pipeline_fa.md new file mode 100644 index 00000000000000..4c33bc91bc171f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_common_voice_pipeline_fa.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Persian whisper_small_common_voice_pipeline pipeline WhisperForCTC from makhataei +author: John Snow Labs +name: whisper_small_common_voice_pipeline +date: 2024-09-06 +tags: [fa, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_common_voice_pipeline` is a Persian model originally trained by makhataei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_common_voice_pipeline_fa_5.5.0_3.0_1725643264990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_common_voice_pipeline_fa_5.5.0_3.0_1725643264990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_common_voice_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_common_voice_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_common_voice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|1.7 GB| + +## References + +https://huggingface.co/makhataei/Whisper-Small-Common-Voice + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_de.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_de.md new file mode 100644 index 00000000000000..35649ed86bc270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_de.md @@ -0,0 +1,84 @@ +--- +layout: model +title: German whisper_small_cv11_german WhisperForCTC from bofenghuang +author: John Snow Labs +name: whisper_small_cv11_german +date: 2024-09-06 +tags: [de, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_cv11_german` is a German model originally trained by bofenghuang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_cv11_german_de_5.5.0_3.0_1725606585997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_cv11_german_de_5.5.0_3.0_1725606585997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_cv11_german","de") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_cv11_german", "de") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_cv11_german| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|de| +|Size:|1.7 GB| + +## References + +https://huggingface.co/bofenghuang/whisper-small-cv11-german \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_pipeline_de.md new file mode 100644 index 00000000000000..7aa1b409719e3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_cv11_german_pipeline_de.md @@ -0,0 +1,69 @@ +--- +layout: model +title: German whisper_small_cv11_german_pipeline pipeline WhisperForCTC from bofenghuang +author: John Snow Labs +name: whisper_small_cv11_german_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_cv11_german_pipeline` is a German model originally trained by bofenghuang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_cv11_german_pipeline_de_5.5.0_3.0_1725606670245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_cv11_german_pipeline_de_5.5.0_3.0_1725606670245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_cv11_german_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_cv11_german_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_cv11_german_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|1.7 GB| + +## References + +https://huggingface.co/bofenghuang/whisper-small-cv11-german + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_en.md new file mode 100644 index 00000000000000..4c9a33b3f292a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_darija_mryasu WhisperForCTC from MrYASU +author: John Snow Labs +name: whisper_small_darija_mryasu +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_darija_mryasu` is a English model originally trained by MrYASU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_darija_mryasu_en_5.5.0_3.0_1725602910237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_darija_mryasu_en_5.5.0_3.0_1725602910237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_darija_mryasu","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_darija_mryasu", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_darija_mryasu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MrYASU/whisper-small-darija \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_pipeline_en.md new file mode 100644 index 00000000000000..9a9e93440ec0aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_darija_mryasu_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_darija_mryasu_pipeline pipeline WhisperForCTC from MrYASU +author: John Snow Labs +name: whisper_small_darija_mryasu_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_darija_mryasu_pipeline` is a English model originally trained by MrYASU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_darija_mryasu_pipeline_en_5.5.0_3.0_1725602998976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_darija_mryasu_pipeline_en_5.5.0_3.0_1725602998976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_darija_mryasu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_darija_mryasu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_darija_mryasu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MrYASU/whisper-small-darija + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_accented_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_accented_en.md new file mode 100644 index 00000000000000..657507bbe1b68e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_accented_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_english_accented WhisperForCTC from Abdo96 +author: John Snow Labs +name: whisper_small_english_accented +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_accented` is a English model originally trained by Abdo96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_accented_en_5.5.0_3.0_1725643594872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_accented_en_5.5.0_3.0_1725643594872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_english_accented","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_english_accented", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_accented| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Abdo96/whisper-small-en-accented \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_en.md new file mode 100644 index 00000000000000..8f8a1263c05063 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_english_hindi_venkatesh4342 WhisperForCTC from Venkatesh4342 +author: John Snow Labs +name: whisper_small_english_hindi_venkatesh4342 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_hindi_venkatesh4342` is a English model originally trained by Venkatesh4342. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_hindi_venkatesh4342_en_5.5.0_3.0_1725642608630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_hindi_venkatesh4342_en_5.5.0_3.0_1725642608630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_english_hindi_venkatesh4342","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_english_hindi_venkatesh4342", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_hindi_venkatesh4342| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Venkatesh4342/whisper-small-en-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_pipeline_en.md new file mode 100644 index 00000000000000..444815ef60bb68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_english_hindi_venkatesh4342_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_english_hindi_venkatesh4342_pipeline pipeline WhisperForCTC from Venkatesh4342 +author: John Snow Labs +name: whisper_small_english_hindi_venkatesh4342_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_hindi_venkatesh4342_pipeline` is a English model originally trained by Venkatesh4342. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_hindi_venkatesh4342_pipeline_en_5.5.0_3.0_1725642700664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_hindi_venkatesh4342_pipeline_en_5.5.0_3.0_1725642700664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_english_hindi_venkatesh4342_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_english_hindi_venkatesh4342_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_hindi_venkatesh4342_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Venkatesh4342/whisper-small-en-hi + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_en.md new file mode 100644 index 00000000000000..af4866a2a74bad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_hausa_mkalbani WhisperForCTC from MKAlbani +author: John Snow Labs +name: whisper_small_hausa_mkalbani +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hausa_mkalbani` is a English model originally trained by MKAlbani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hausa_mkalbani_en_5.5.0_3.0_1725604194051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hausa_mkalbani_en_5.5.0_3.0_1725604194051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_hausa_mkalbani","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_hausa_mkalbani", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hausa_mkalbani| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MKAlbani/whisper-small-ha \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_pipeline_en.md new file mode 100644 index 00000000000000..2fa18440429268 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_hausa_mkalbani_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_hausa_mkalbani_pipeline pipeline WhisperForCTC from MKAlbani +author: John Snow Labs +name: whisper_small_hausa_mkalbani_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hausa_mkalbani_pipeline` is a English model originally trained by MKAlbani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hausa_mkalbani_pipeline_en_5.5.0_3.0_1725604279281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hausa_mkalbani_pipeline_en_5.5.0_3.0_1725604279281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_hausa_mkalbani_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_hausa_mkalbani_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hausa_mkalbani_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/MKAlbani/whisper-small-ha + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_cv17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_cv17_pipeline_en.md new file mode 100644 index 00000000000000..e1ae0377ff8d95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_cv17_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_indonesian_cv17_pipeline pipeline WhisperForCTC from Bagus +author: John Snow Labs +name: whisper_small_indonesian_cv17_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_indonesian_cv17_pipeline` is a English model originally trained by Bagus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_cv17_pipeline_en_5.5.0_3.0_1725646027529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_cv17_pipeline_en_5.5.0_3.0_1725646027529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_indonesian_cv17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_indonesian_cv17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_indonesian_cv17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Bagus/whisper-small-id-cv17 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_id.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_id.md new file mode 100644 index 00000000000000..1b628fc64661ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_id.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Indonesian whisper_small_indonesian_tonimurfid WhisperForCTC from tonimurfid +author: John Snow Labs +name: whisper_small_indonesian_tonimurfid +date: 2024-09-06 +tags: [id, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_indonesian_tonimurfid` is a Indonesian model originally trained by tonimurfid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_tonimurfid_id_5.5.0_3.0_1725646801600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_tonimurfid_id_5.5.0_3.0_1725646801600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_indonesian_tonimurfid","id") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_indonesian_tonimurfid", "id") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_indonesian_tonimurfid| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|id| +|Size:|1.7 GB| + +## References + +https://huggingface.co/tonimurfid/whisper-small-id \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_pipeline_id.md new file mode 100644 index 00000000000000..4d3c4b9579adfd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_indonesian_tonimurfid_pipeline_id.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Indonesian whisper_small_indonesian_tonimurfid_pipeline pipeline WhisperForCTC from tonimurfid +author: John Snow Labs +name: whisper_small_indonesian_tonimurfid_pipeline +date: 2024-09-06 +tags: [id, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_indonesian_tonimurfid_pipeline` is a Indonesian model originally trained by tonimurfid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_tonimurfid_pipeline_id_5.5.0_3.0_1725646888553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_indonesian_tonimurfid_pipeline_id_5.5.0_3.0_1725646888553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_indonesian_tonimurfid_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_indonesian_tonimurfid_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_indonesian_tonimurfid_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|1.7 GB| + +## References + +https://huggingface.co/tonimurfid/whisper-small-id + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_km.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_km.md new file mode 100644 index 00000000000000..c570b2aab7ee5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_km.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Central Khmer, Khmer whisper_small_khmer_v2 WhisperForCTC from seanghay +author: John Snow Labs +name: whisper_small_khmer_v2 +date: 2024-09-06 +tags: [km, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: km +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_khmer_v2` is a Central Khmer, Khmer model originally trained by seanghay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_khmer_v2_km_5.5.0_3.0_1725581448849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_khmer_v2_km_5.5.0_3.0_1725581448849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_khmer_v2","km") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_khmer_v2", "km") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_khmer_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|km| +|Size:|1.7 GB| + +## References + +https://huggingface.co/seanghay/whisper-small-khmer-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_pipeline_km.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_pipeline_km.md new file mode 100644 index 00000000000000..3657a0976ce48d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_khmer_v2_pipeline_km.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Central Khmer, Khmer whisper_small_khmer_v2_pipeline pipeline WhisperForCTC from seanghay +author: John Snow Labs +name: whisper_small_khmer_v2_pipeline +date: 2024-09-06 +tags: [km, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: km +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_khmer_v2_pipeline` is a Central Khmer, Khmer model originally trained by seanghay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_khmer_v2_pipeline_km_5.5.0_3.0_1725581546094.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_khmer_v2_pipeline_km_5.5.0_3.0_1725581546094.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_khmer_v2_pipeline", lang = "km") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_khmer_v2_pipeline", lang = "km") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_khmer_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|km| +|Size:|1.7 GB| + +## References + +https://huggingface.co/seanghay/whisper-small-khmer-v2 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_haseong8012_ko.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_haseong8012_ko.md new file mode 100644 index 00000000000000..fd828aa01d1683 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_haseong8012_ko.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Korean whisper_small_korean_haseong8012 WhisperForCTC from haseong8012 +author: John Snow Labs +name: whisper_small_korean_haseong8012 +date: 2024-09-06 +tags: [ko, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_korean_haseong8012` is a Korean model originally trained by haseong8012. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_korean_haseong8012_ko_5.5.0_3.0_1725644631469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_korean_haseong8012_ko_5.5.0_3.0_1725644631469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_korean_haseong8012","ko") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_korean_haseong8012", "ko") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_korean_haseong8012| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ko| +|Size:|642.4 MB| + +## References + +https://huggingface.co/haseong8012/whisper-small-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_ko.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_ko.md new file mode 100644 index 00000000000000..aedd830cce43ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_ko.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Korean whisper_small_korean_sungbeom WhisperForCTC from SungBeom +author: John Snow Labs +name: whisper_small_korean_sungbeom +date: 2024-09-06 +tags: [ko, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_korean_sungbeom` is a Korean model originally trained by SungBeom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_korean_sungbeom_ko_5.5.0_3.0_1725602055758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_korean_sungbeom_ko_5.5.0_3.0_1725602055758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_korean_sungbeom","ko") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_korean_sungbeom", "ko") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_korean_sungbeom| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ko| +|Size:|1.7 GB| + +## References + +https://huggingface.co/SungBeom/whisper-small-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_pipeline_ko.md new file mode 100644 index 00000000000000..551ef8b96365c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_sungbeom_pipeline_ko.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Korean whisper_small_korean_sungbeom_pipeline pipeline WhisperForCTC from SungBeom +author: John Snow Labs +name: whisper_small_korean_sungbeom_pipeline +date: 2024-09-06 +tags: [ko, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_korean_sungbeom_pipeline` is a Korean model originally trained by SungBeom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_korean_sungbeom_pipeline_ko_5.5.0_3.0_1725602140561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_korean_sungbeom_pipeline_ko_5.5.0_3.0_1725602140561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_korean_sungbeom_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_korean_sungbeom_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_korean_sungbeom_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|1.7 GB| + +## References + +https://huggingface.co/SungBeom/whisper-small-ko + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_en.md new file mode 100644 index 00000000000000..941a5260a573d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_korean_zeroth WhisperForCTC from jiwon65 +author: John Snow Labs +name: whisper_small_korean_zeroth +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_korean_zeroth` is a English model originally trained by jiwon65. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_korean_zeroth_en_5.5.0_3.0_1725647679713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_korean_zeroth_en_5.5.0_3.0_1725647679713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_korean_zeroth","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_korean_zeroth", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_korean_zeroth| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jiwon65/whisper-small_korean-zeroth \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_pipeline_en.md new file mode 100644 index 00000000000000..59cc3703f9dd77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_korean_zeroth_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_korean_zeroth_pipeline pipeline WhisperForCTC from jiwon65 +author: John Snow Labs +name: whisper_small_korean_zeroth_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_korean_zeroth_pipeline` is a English model originally trained by jiwon65. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_korean_zeroth_pipeline_en_5.5.0_3.0_1725647768099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_korean_zeroth_pipeline_en_5.5.0_3.0_1725647768099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_korean_zeroth_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_korean_zeroth_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_korean_zeroth_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jiwon65/whisper-small_korean-zeroth + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_lv.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_lv.md new file mode 100644 index 00000000000000..3ecaf62531369e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_lv.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Latvian whisper_small_latvian_raivisdejus WhisperForCTC from RaivisDejus +author: John Snow Labs +name: whisper_small_latvian_raivisdejus +date: 2024-09-06 +tags: [lv, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: lv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_latvian_raivisdejus` is a Latvian model originally trained by RaivisDejus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_latvian_raivisdejus_lv_5.5.0_3.0_1725643022136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_latvian_raivisdejus_lv_5.5.0_3.0_1725643022136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_latvian_raivisdejus","lv") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_latvian_raivisdejus", "lv") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_latvian_raivisdejus| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|lv| +|Size:|1.7 GB| + +## References + +https://huggingface.co/RaivisDejus/whisper-small-lv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_pipeline_lv.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_pipeline_lv.md new file mode 100644 index 00000000000000..0501ecd33d1a7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_latvian_raivisdejus_pipeline_lv.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Latvian whisper_small_latvian_raivisdejus_pipeline pipeline WhisperForCTC from RaivisDejus +author: John Snow Labs +name: whisper_small_latvian_raivisdejus_pipeline +date: 2024-09-06 +tags: [lv, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: lv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_latvian_raivisdejus_pipeline` is a Latvian model originally trained by RaivisDejus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_latvian_raivisdejus_pipeline_lv_5.5.0_3.0_1725643108325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_latvian_raivisdejus_pipeline_lv_5.5.0_3.0_1725643108325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_latvian_raivisdejus_pipeline", lang = "lv") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_latvian_raivisdejus_pipeline", lang = "lv") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_latvian_raivisdejus_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|lv| +|Size:|1.7 GB| + +## References + +https://huggingface.co/RaivisDejus/whisper-small-lv + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_ml.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_ml.md new file mode 100644 index 00000000000000..560bc7f5e1ed07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_ml.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Malayalam whisper_small_malayalam_v5 WhisperForCTC from arjunshajitech +author: John Snow Labs +name: whisper_small_malayalam_v5 +date: 2024-09-06 +tags: [ml, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ml +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_malayalam_v5` is a Malayalam model originally trained by arjunshajitech. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_malayalam_v5_ml_5.5.0_3.0_1725583782762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_malayalam_v5_ml_5.5.0_3.0_1725583782762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_malayalam_v5","ml") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_malayalam_v5", "ml") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_malayalam_v5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ml| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arjunshajitech/whisper-small-malayalam-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_pipeline_ml.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_pipeline_ml.md new file mode 100644 index 00000000000000..c7f13e8cafa78e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_malayalam_v5_pipeline_ml.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Malayalam whisper_small_malayalam_v5_pipeline pipeline WhisperForCTC from arjunshajitech +author: John Snow Labs +name: whisper_small_malayalam_v5_pipeline +date: 2024-09-06 +tags: [ml, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ml +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_malayalam_v5_pipeline` is a Malayalam model originally trained by arjunshajitech. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_malayalam_v5_pipeline_ml_5.5.0_3.0_1725583871191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_malayalam_v5_pipeline_ml_5.5.0_3.0_1725583871191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_malayalam_v5_pipeline", lang = "ml") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_malayalam_v5_pipeline", lang = "ml") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_malayalam_v5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ml| +|Size:|1.7 GB| + +## References + +https://huggingface.co/arjunshajitech/whisper-small-malayalam-v5 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_mn.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_mn.md new file mode 100644 index 00000000000000..6c5820e047e8af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_mn.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Mongolian whisper_small_mongolian_erkhem_gantulga WhisperForCTC from erkhem-gantulga +author: John Snow Labs +name: whisper_small_mongolian_erkhem_gantulga +date: 2024-09-06 +tags: [mn, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: mn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_mongolian_erkhem_gantulga` is a Mongolian model originally trained by erkhem-gantulga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_mongolian_erkhem_gantulga_mn_5.5.0_3.0_1725603331235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_mongolian_erkhem_gantulga_mn_5.5.0_3.0_1725603331235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_mongolian_erkhem_gantulga","mn") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_mongolian_erkhem_gantulga", "mn") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_mongolian_erkhem_gantulga| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|mn| +|Size:|1.7 GB| + +## References + +https://huggingface.co/erkhem-gantulga/whisper-small-mn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_pipeline_mn.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_pipeline_mn.md new file mode 100644 index 00000000000000..18f7604deb07ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_mongolian_erkhem_gantulga_pipeline_mn.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Mongolian whisper_small_mongolian_erkhem_gantulga_pipeline pipeline WhisperForCTC from erkhem-gantulga +author: John Snow Labs +name: whisper_small_mongolian_erkhem_gantulga_pipeline +date: 2024-09-06 +tags: [mn, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: mn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_mongolian_erkhem_gantulga_pipeline` is a Mongolian model originally trained by erkhem-gantulga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_mongolian_erkhem_gantulga_pipeline_mn_5.5.0_3.0_1725603412548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_mongolian_erkhem_gantulga_pipeline_mn_5.5.0_3.0_1725603412548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_mongolian_erkhem_gantulga_pipeline", lang = "mn") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_mongolian_erkhem_gantulga_pipeline", lang = "mn") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_mongolian_erkhem_gantulga_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mn| +|Size:|1.7 GB| + +## References + +https://huggingface.co/erkhem-gantulga/whisper-small-mn + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_en.md new file mode 100644 index 00000000000000..efc976d6855197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_sinhala_fine_tune WhisperForCTC from Subhaka +author: John Snow Labs +name: whisper_small_sinhala_fine_tune +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_sinhala_fine_tune` is a English model originally trained by Subhaka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_sinhala_fine_tune_en_5.5.0_3.0_1725585981430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_sinhala_fine_tune_en_5.5.0_3.0_1725585981430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_sinhala_fine_tune","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_sinhala_fine_tune", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_sinhala_fine_tune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Subhaka/whisper-small-Sinhala-Fine_Tune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_pipeline_en.md new file mode 100644 index 00000000000000..e122720d82457f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_sinhala_fine_tune_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_sinhala_fine_tune_pipeline pipeline WhisperForCTC from Subhaka +author: John Snow Labs +name: whisper_small_sinhala_fine_tune_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_sinhala_fine_tune_pipeline` is a English model originally trained by Subhaka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_sinhala_fine_tune_pipeline_en_5.5.0_3.0_1725586072635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_sinhala_fine_tune_pipeline_en_5.5.0_3.0_1725586072635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_sinhala_fine_tune_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_sinhala_fine_tune_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_sinhala_fine_tune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/Subhaka/whisper-small-Sinhala-Fine_Tune + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_en.md new file mode 100644 index 00000000000000..2901fb25ae5b2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_spanish_clu_ling WhisperForCTC from clu-ling +author: John Snow Labs +name: whisper_small_spanish_clu_ling +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_spanish_clu_ling` is a English model originally trained by clu-ling. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_clu_ling_en_5.5.0_3.0_1725582212449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_clu_ling_en_5.5.0_3.0_1725582212449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_spanish_clu_ling","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_spanish_clu_ling", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_spanish_clu_ling| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/clu-ling/whisper-small-spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_pipeline_en.md new file mode 100644 index 00000000000000..c58e4c97f3dae0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_clu_ling_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_spanish_clu_ling_pipeline pipeline WhisperForCTC from clu-ling +author: John Snow Labs +name: whisper_small_spanish_clu_ling_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_spanish_clu_ling_pipeline` is a English model originally trained by clu-ling. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_clu_ling_pipeline_en_5.5.0_3.0_1725582299141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_clu_ling_pipeline_en_5.5.0_3.0_1725582299141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_spanish_clu_ling_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_spanish_clu_ling_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_spanish_clu_ling_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/clu-ling/whisper-small-spanish + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en.md new file mode 100644 index 00000000000000..7328452d7b5a1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_spanish_nemo_unified_2024_07_02_15_19_06 WhisperForCTC from sgonzalezsilot +author: John Snow Labs +name: whisper_small_spanish_nemo_unified_2024_07_02_15_19_06 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_spanish_nemo_unified_2024_07_02_15_19_06` is a English model originally trained by sgonzalezsilot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en_5.5.0_3.0_1725586751706.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_spanish_nemo_unified_2024_07_02_15_19_06_en_5.5.0_3.0_1725586751706.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_spanish_nemo_unified_2024_07_02_15_19_06","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_spanish_nemo_unified_2024_07_02_15_19_06", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_spanish_nemo_unified_2024_07_02_15_19_06| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/sgonzalezsilot/whisper-small-es-Nemo_unified_2024-07-02_15-19-06 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_pipeline_sw.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_pipeline_sw.md new file mode 100644 index 00000000000000..5f7109a32e34d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_pipeline_sw.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Swahili (macrolanguage) whisper_small_swahili_asr_chuvash_14_pipeline pipeline WhisperForCTC from dmusingu +author: John Snow Labs +name: whisper_small_swahili_asr_chuvash_14_pipeline +date: 2024-09-06 +tags: [sw, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_swahili_asr_chuvash_14_pipeline` is a Swahili (macrolanguage) model originally trained by dmusingu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_swahili_asr_chuvash_14_pipeline_sw_5.5.0_3.0_1725607118698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_swahili_asr_chuvash_14_pipeline_sw_5.5.0_3.0_1725607118698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_swahili_asr_chuvash_14_pipeline", lang = "sw") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_swahili_asr_chuvash_14_pipeline", lang = "sw") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_swahili_asr_chuvash_14_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sw| +|Size:|1.7 GB| + +## References + +https://huggingface.co/dmusingu/WHISPER-SMALL-SWAHILI-ASR-CV-14 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_sw.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_sw.md new file mode 100644 index 00000000000000..ee07e9df6efe77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_swahili_asr_chuvash_14_sw.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Swahili (macrolanguage) whisper_small_swahili_asr_chuvash_14 WhisperForCTC from dmusingu +author: John Snow Labs +name: whisper_small_swahili_asr_chuvash_14 +date: 2024-09-06 +tags: [sw, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_swahili_asr_chuvash_14` is a Swahili (macrolanguage) model originally trained by dmusingu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_swahili_asr_chuvash_14_sw_5.5.0_3.0_1725607039501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_swahili_asr_chuvash_14_sw_5.5.0_3.0_1725607039501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_swahili_asr_chuvash_14","sw") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_swahili_asr_chuvash_14", "sw") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_swahili_asr_chuvash_14| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|sw| +|Size:|1.7 GB| + +## References + +https://huggingface.co/dmusingu/WHISPER-SMALL-SWAHILI-ASR-CV-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_pipeline_ta.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_pipeline_ta.md new file mode 100644 index 00000000000000..968e088f2cca85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_pipeline_ta.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Tamil whisper_small_tamil_parambharat_pipeline pipeline WhisperForCTC from parambharat +author: John Snow Labs +name: whisper_small_tamil_parambharat_pipeline +date: 2024-09-06 +tags: [ta, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ta +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_tamil_parambharat_pipeline` is a Tamil model originally trained by parambharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_tamil_parambharat_pipeline_ta_5.5.0_3.0_1725582484628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_tamil_parambharat_pipeline_ta_5.5.0_3.0_1725582484628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_tamil_parambharat_pipeline", lang = "ta") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_tamil_parambharat_pipeline", lang = "ta") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_tamil_parambharat_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ta| +|Size:|1.7 GB| + +## References + +https://huggingface.co/parambharat/whisper-small-ta + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_ta.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_ta.md new file mode 100644 index 00000000000000..c3ed42401ffe6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_tamil_parambharat_ta.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Tamil whisper_small_tamil_parambharat WhisperForCTC from parambharat +author: John Snow Labs +name: whisper_small_tamil_parambharat +date: 2024-09-06 +tags: [ta, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ta +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_tamil_parambharat` is a Tamil model originally trained by parambharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_tamil_parambharat_ta_5.5.0_3.0_1725582399856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_tamil_parambharat_ta_5.5.0_3.0_1725582399856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_tamil_parambharat","ta") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_tamil_parambharat", "ta") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_tamil_parambharat| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ta| +|Size:|1.7 GB| + +## References + +https://huggingface.co/parambharat/whisper-small-ta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_pipeline_xh.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_pipeline_xh.md new file mode 100644 index 00000000000000..81bbd44b392422 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_pipeline_xh.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Xhosa whisper_small_xhosa_pipeline pipeline WhisperForCTC from TheirStory +author: John Snow Labs +name: whisper_small_xhosa_pipeline +date: 2024-09-06 +tags: [xh, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: xh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_xhosa_pipeline` is a Xhosa model originally trained by TheirStory. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_xhosa_pipeline_xh_5.5.0_3.0_1725643372522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_xhosa_pipeline_xh_5.5.0_3.0_1725643372522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_xhosa_pipeline", lang = "xh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_xhosa_pipeline", lang = "xh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_xhosa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xh| +|Size:|1.7 GB| + +## References + +https://huggingface.co/TheirStory/whisper-small-xhosa + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_xh.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_xh.md new file mode 100644 index 00000000000000..fed5958f5012d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_small_xhosa_xh.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Xhosa whisper_small_xhosa WhisperForCTC from TheirStory +author: John Snow Labs +name: whisper_small_xhosa +date: 2024-09-06 +tags: [xh, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: xh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_xhosa` is a Xhosa model originally trained by TheirStory. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_xhosa_xh_5.5.0_3.0_1725643288873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_xhosa_xh_5.5.0_3.0_1725643288873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_xhosa","xh") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_xhosa", "xh") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_xhosa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|xh| +|Size:|1.7 GB| + +## References + +https://huggingface.co/TheirStory/whisper-small-xhosa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_pipeline_te.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_pipeline_te.md new file mode 100644 index 00000000000000..879357e9156b0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_pipeline_te.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Telugu whisper_telugu_base_pipeline pipeline WhisperForCTC from vasista22 +author: John Snow Labs +name: whisper_telugu_base_pipeline +date: 2024-09-06 +tags: [te, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_telugu_base_pipeline` is a Telugu model originally trained by vasista22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_telugu_base_pipeline_te_5.5.0_3.0_1725603752752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_telugu_base_pipeline_te_5.5.0_3.0_1725603752752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_telugu_base_pipeline", lang = "te") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_telugu_base_pipeline", lang = "te") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_telugu_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|te| +|Size:|643.5 MB| + +## References + +https://huggingface.co/vasista22/whisper-telugu-base + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_te.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_te.md new file mode 100644 index 00000000000000..20d8253e960c99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_telugu_base_te.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Telugu whisper_telugu_base WhisperForCTC from vasista22 +author: John Snow Labs +name: whisper_telugu_base +date: 2024-09-06 +tags: [te, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_telugu_base` is a Telugu model originally trained by vasista22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_telugu_base_te_5.5.0_3.0_1725603715542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_telugu_base_te_5.5.0_3.0_1725603715542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_telugu_base","te") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_telugu_base", "te") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_telugu_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|te| +|Size:|643.5 MB| + +## References + +https://huggingface.co/vasista22/whisper-telugu-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_en.md new file mode 100644 index 00000000000000..6f97ce22fcc7e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_test_quant_quantized_samagradatagov WhisperForCTC from SamagraDataGov +author: John Snow Labs +name: whisper_test_quant_quantized_samagradatagov +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_test_quant_quantized_samagradatagov` is a English model originally trained by SamagraDataGov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_test_quant_quantized_samagradatagov_en_5.5.0_3.0_1725583639525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_test_quant_quantized_samagradatagov_en_5.5.0_3.0_1725583639525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_test_quant_quantized_samagradatagov","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_test_quant_quantized_samagradatagov", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_test_quant_quantized_samagradatagov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|242.8 MB| + +## References + +https://huggingface.co/SamagraDataGov/whisper-test-quant_quantized \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_pipeline_en.md new file mode 100644 index 00000000000000..b5fc369a5bc7ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_test_quant_quantized_samagradatagov_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_test_quant_quantized_samagradatagov_pipeline pipeline WhisperForCTC from SamagraDataGov +author: John Snow Labs +name: whisper_test_quant_quantized_samagradatagov_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_test_quant_quantized_samagradatagov_pipeline` is a English model originally trained by SamagraDataGov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_test_quant_quantized_samagradatagov_pipeline_en_5.5.0_3.0_1725583705082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_test_quant_quantized_samagradatagov_pipeline_en_5.5.0_3.0_1725583705082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_test_quant_quantized_samagradatagov_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_test_quant_quantized_samagradatagov_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_test_quant_quantized_samagradatagov_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|242.9 MB| + +## References + +https://huggingface.co/SamagraDataGov/whisper-test-quant_quantized + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_en.md new file mode 100644 index 00000000000000..d28aed242fd86a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_arabic_quran WhisperForCTC from tarteel-ai +author: John Snow Labs +name: whisper_tiny_arabic_quran +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_arabic_quran` is a English model originally trained by tarteel-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_quran_en_5.5.0_3.0_1725606496071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_quran_en_5.5.0_3.0_1725606496071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_arabic_quran","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_arabic_quran", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_arabic_quran| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|390.6 MB| + +## References + +https://huggingface.co/tarteel-ai/whisper-tiny-ar-quran \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_pipeline_en.md new file mode 100644 index 00000000000000..b928fc46da1ac4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_arabic_quran_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_arabic_quran_pipeline pipeline WhisperForCTC from tarteel-ai +author: John Snow Labs +name: whisper_tiny_arabic_quran_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_arabic_quran_pipeline` is a English model originally trained by tarteel-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_quran_pipeline_en_5.5.0_3.0_1725606516574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_arabic_quran_pipeline_en_5.5.0_3.0_1725606516574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_arabic_quran_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_arabic_quran_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_arabic_quran_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|390.6 MB| + +## References + +https://huggingface.co/tarteel-ai/whisper-tiny-ar-quran + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_en.md new file mode 100644 index 00000000000000..76c09e32e17597 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_english_atcosim WhisperForCTC from jlvdoorn +author: John Snow Labs +name: whisper_tiny_english_atcosim +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_english_atcosim` is a English model originally trained by jlvdoorn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_atcosim_en_5.5.0_3.0_1725646130018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_atcosim_en_5.5.0_3.0_1725646130018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_english_atcosim","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_english_atcosim", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_english_atcosim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|393.7 MB| + +## References + +https://huggingface.co/jlvdoorn/whisper-tiny.en-atcosim \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_pipeline_en.md new file mode 100644 index 00000000000000..e6ea189da510b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_english_atcosim_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_english_atcosim_pipeline pipeline WhisperForCTC from jlvdoorn +author: John Snow Labs +name: whisper_tiny_english_atcosim_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_english_atcosim_pipeline` is a English model originally trained by jlvdoorn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_atcosim_pipeline_en_5.5.0_3.0_1725646150221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_english_atcosim_pipeline_en_5.5.0_3.0_1725646150221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_english_atcosim_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_english_atcosim_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_english_atcosim_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|393.8 MB| + +## References + +https://huggingface.co/jlvdoorn/whisper-tiny.en-atcosim + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_fr.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_fr.md new file mode 100644 index 00000000000000..49d24e5710e71d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_fr.md @@ -0,0 +1,84 @@ +--- +layout: model +title: French whisper_tiny_french_cased WhisperForCTC from qanastek +author: John Snow Labs +name: whisper_tiny_french_cased +date: 2024-09-06 +tags: [fr, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_french_cased` is a French model originally trained by qanastek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_french_cased_fr_5.5.0_3.0_1725643355641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_french_cased_fr_5.5.0_3.0_1725643355641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_french_cased","fr") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_french_cased", "fr") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_french_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|fr| +|Size:|390.9 MB| + +## References + +https://huggingface.co/qanastek/whisper-tiny-french-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_pipeline_fr.md new file mode 100644 index 00000000000000..27e22996a6c60e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_french_cased_pipeline_fr.md @@ -0,0 +1,69 @@ +--- +layout: model +title: French whisper_tiny_french_cased_pipeline pipeline WhisperForCTC from qanastek +author: John Snow Labs +name: whisper_tiny_french_cased_pipeline +date: 2024-09-06 +tags: [fr, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_french_cased_pipeline` is a French model originally trained by qanastek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_french_cased_pipeline_fr_5.5.0_3.0_1725643376021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_french_cased_pipeline_fr_5.5.0_3.0_1725643376021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_french_cased_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_french_cased_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_french_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|390.9 MB| + +## References + +https://huggingface.co/qanastek/whisper-tiny-french-cased + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_german_primeline_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_german_primeline_pipeline_de.md new file mode 100644 index 00000000000000..87d6879f7fa8ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_german_primeline_pipeline_de.md @@ -0,0 +1,69 @@ +--- +layout: model +title: German whisper_tiny_german_primeline_pipeline pipeline WhisperForCTC from primeline +author: John Snow Labs +name: whisper_tiny_german_primeline_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_german_primeline_pipeline` is a German model originally trained by primeline. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_german_primeline_pipeline_de_5.5.0_3.0_1725586364041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_german_primeline_pipeline_de_5.5.0_3.0_1725586364041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_german_primeline_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_german_primeline_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_german_primeline_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|187.5 MB| + +## References + +https://huggingface.co/primeline/whisper-tiny-german + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_korean_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_korean_pipeline_ko.md new file mode 100644 index 00000000000000..375e58f06ba7cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_korean_pipeline_ko.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Korean whisper_tiny_korean_pipeline pipeline WhisperForCTC from TheoJo +author: John Snow Labs +name: whisper_tiny_korean_pipeline +date: 2024-09-06 +tags: [ko, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_korean_pipeline` is a Korean model originally trained by TheoJo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_korean_pipeline_ko_5.5.0_3.0_1725604849853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_korean_pipeline_ko_5.5.0_3.0_1725604849853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_korean_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_korean_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_korean_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|242.8 MB| + +## References + +https://huggingface.co/TheoJo/whisper-tiny-ko + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_latvian_lv.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_latvian_lv.md new file mode 100644 index 00000000000000..004a4222435736 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_latvian_lv.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Latvian whisper_tiny_latvian WhisperForCTC from RaivisDejus +author: John Snow Labs +name: whisper_tiny_latvian +date: 2024-09-06 +tags: [lv, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: lv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_latvian` is a Latvian model originally trained by RaivisDejus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_latvian_lv_5.5.0_3.0_1725606976269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_latvian_lv_5.5.0_3.0_1725606976269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_latvian","lv") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_latvian", "lv") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_latvian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|lv| +|Size:|388.5 MB| + +## References + +https://huggingface.co/RaivisDejus/whisper-tiny-lv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_ne.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_ne.md new file mode 100644 index 00000000000000..428fcd243fd237 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_ne.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Nepali (macrolanguage) whisper_tiny_nepali WhisperForCTC from kiranpantha +author: John Snow Labs +name: whisper_tiny_nepali +date: 2024-09-06 +tags: [ne, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ne +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_nepali` is a Nepali (macrolanguage) model originally trained by kiranpantha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_nepali_ne_5.5.0_3.0_1725583074631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_nepali_ne_5.5.0_3.0_1725583074631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_nepali","ne") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_nepali", "ne") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_nepali| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ne| +|Size:|390.0 MB| + +## References + +https://huggingface.co/kiranpantha/whisper-tiny-ne \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_pipeline_ne.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_pipeline_ne.md new file mode 100644 index 00000000000000..0200d3049a1417 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_nepali_pipeline_ne.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Nepali (macrolanguage) whisper_tiny_nepali_pipeline pipeline WhisperForCTC from kiranpantha +author: John Snow Labs +name: whisper_tiny_nepali_pipeline +date: 2024-09-06 +tags: [ne, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ne +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_nepali_pipeline` is a Nepali (macrolanguage) model originally trained by kiranpantha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_nepali_pipeline_ne_5.5.0_3.0_1725583098004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_nepali_pipeline_ne_5.5.0_3.0_1725583098004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_nepali_pipeline", lang = "ne") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_nepali_pipeline", lang = "ne") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_nepali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ne| +|Size:|390.0 MB| + +## References + +https://huggingface.co/kiranpantha/whisper-tiny-ne + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_persian_farsi_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_persian_farsi_pipeline_fa.md new file mode 100644 index 00000000000000..2cf6ce61579f7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_persian_farsi_pipeline_fa.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Persian whisper_tiny_persian_farsi_pipeline pipeline WhisperForCTC from javadr +author: John Snow Labs +name: whisper_tiny_persian_farsi_pipeline +date: 2024-09-06 +tags: [fa, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_persian_farsi_pipeline` is a Persian model originally trained by javadr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_persian_farsi_pipeline_fa_5.5.0_3.0_1725603408213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_persian_farsi_pipeline_fa_5.5.0_3.0_1725603408213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_persian_farsi_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_persian_farsi_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_persian_farsi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|390.1 MB| + +## References + +https://huggingface.co/javadr/whisper-tiny-fa + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_serbian_yodas_v0_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_serbian_yodas_v0_2_en.md new file mode 100644 index 00000000000000..8b618498671ba0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-whisper_tiny_serbian_yodas_v0_2_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_serbian_yodas_v0_2 WhisperForCTC from cminja +author: John Snow Labs +name: whisper_tiny_serbian_yodas_v0_2 +date: 2024-09-06 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_serbian_yodas_v0_2` is a English model originally trained by cminja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_serbian_yodas_v0_2_en_5.5.0_3.0_1725643736848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_serbian_yodas_v0_2_en_5.5.0_3.0_1725643736848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_serbian_yodas_v0_2","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_serbian_yodas_v0_2", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_serbian_yodas_v0_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|388.9 MB| + +## References + +https://huggingface.co/cminja/whisper-tiny-sr-yodas-v0.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_en.md b/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_en.md new file mode 100644 index 00000000000000..4b25df6c945f39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English wikidata_simplequestions RoBertaEmbeddings from drt +author: John Snow Labs +name: wikidata_simplequestions +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wikidata_simplequestions` is a English model originally trained by drt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wikidata_simplequestions_en_5.5.0_3.0_1725660716472.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wikidata_simplequestions_en_5.5.0_3.0_1725660716472.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("wikidata_simplequestions","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("wikidata_simplequestions","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wikidata_simplequestions| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|428.2 MB| + +## References + +https://huggingface.co/drt/wikidata-simplequestions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_pipeline_en.md new file mode 100644 index 00000000000000..acc3b91fa41f3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-wikidata_simplequestions_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English wikidata_simplequestions_pipeline pipeline RoBertaEmbeddings from drt +author: John Snow Labs +name: wikidata_simplequestions_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wikidata_simplequestions_pipeline` is a English model originally trained by drt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wikidata_simplequestions_pipeline_en_5.5.0_3.0_1725660753758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wikidata_simplequestions_pipeline_en_5.5.0_3.0_1725660753758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wikidata_simplequestions_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wikidata_simplequestions_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wikidata_simplequestions_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|428.2 MB| + +## References + +https://huggingface.co/drt/wikidata-simplequestions + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_en.md new file mode 100644 index 00000000000000..e98462f03ca9c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_claim_detection XlmRoBertaForSequenceClassification from Nithiwat +author: John Snow Labs +name: xlm_roberta_base_claim_detection +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_claim_detection` is a English model originally trained by Nithiwat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_claim_detection_en_5.5.0_3.0_1725619999334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_claim_detection_en_5.5.0_3.0_1725619999334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_claim_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_claim_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_claim_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|850.2 MB| + +## References + +https://huggingface.co/Nithiwat/xlm-roberta-base_claim-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_pipeline_en.md new file mode 100644 index 00000000000000..dfc98e7465fa7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_claim_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_claim_detection_pipeline pipeline XlmRoBertaForSequenceClassification from Nithiwat +author: John Snow Labs +name: xlm_roberta_base_claim_detection_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_claim_detection_pipeline` is a English model originally trained by Nithiwat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_claim_detection_pipeline_en_5.5.0_3.0_1725620066763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_claim_detection_pipeline_en_5.5.0_3.0_1725620066763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_claim_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_claim_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_claim_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|850.2 MB| + +## References + +https://huggingface.co/Nithiwat/xlm-roberta-base_claim-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en.md new file mode 100644 index 00000000000000..f69a4fc9bed687 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en_5.5.0_3.0_1725640488426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_en_5.5.0_3.0_1725640488426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|817.7 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-20-3-2023-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en.md new file mode 100644 index 00000000000000..42f9a478fea192 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en_5.5.0_3.0_1725640607297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline_en_5.5.0_3.0_1725640607297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_20_3_2023_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|817.7 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-20-3-2023-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_en.md new file mode 100644 index 00000000000000..bf724353723a89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_burmese_dear_watson2 XlmRoBertaEmbeddings from SmartPy +author: John Snow Labs +name: xlm_roberta_base_finetuned_burmese_dear_watson2 +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_burmese_dear_watson2` is a English model originally trained by SmartPy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_burmese_dear_watson2_en_5.5.0_3.0_1725626381681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_burmese_dear_watson2_en_5.5.0_3.0_1725626381681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_burmese_dear_watson2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_burmese_dear_watson2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_burmese_dear_watson2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/SmartPy/xlm-roberta-base-finetuned-my_dear_watson2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en.md new file mode 100644 index 00000000000000..8645fed233d96f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline pipeline XlmRoBertaEmbeddings from SmartPy +author: John Snow Labs +name: xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline` is a English model originally trained by SmartPy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en_5.5.0_3.0_1725626441318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline_en_5.5.0_3.0_1725626441318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_burmese_dear_watson2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/SmartPy/xlm-roberta-base-finetuned-my_dear_watson2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_en.md new file mode 100644 index 00000000000000..638bcabc3278fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_ecoicop XlmRoBertaForSequenceClassification from peter2000 +author: John Snow Labs +name: xlm_roberta_base_finetuned_ecoicop +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_ecoicop` is a English model originally trained by peter2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_ecoicop_en_5.5.0_3.0_1725616336368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_ecoicop_en_5.5.0_3.0_1725616336368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_ecoicop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_ecoicop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_ecoicop| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|857.5 MB| + +## References + +https://huggingface.co/peter2000/xlm-roberta-base-finetuned-ecoicop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_pipeline_en.md new file mode 100644 index 00000000000000..bf3ce75401d9e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_ecoicop_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_ecoicop_pipeline pipeline XlmRoBertaForSequenceClassification from peter2000 +author: John Snow Labs +name: xlm_roberta_base_finetuned_ecoicop_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_ecoicop_pipeline` is a English model originally trained by peter2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_ecoicop_pipeline_en_5.5.0_3.0_1725616403172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_ecoicop_pipeline_en_5.5.0_3.0_1725616403172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_ecoicop_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_ecoicop_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_ecoicop_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|857.5 MB| + +## References + +https://huggingface.co/peter2000/xlm-roberta-base-finetuned-ecoicop + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en.md new file mode 100644 index 00000000000000..9c939e22d1461d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_emotion_37_labels_pipeline pipeline XlmRoBertaForSequenceClassification from upsalite +author: John Snow Labs +name: xlm_roberta_base_finetuned_emotion_37_labels_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_emotion_37_labels_pipeline` is a English model originally trained by upsalite. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en_5.5.0_3.0_1725617321248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_emotion_37_labels_pipeline_en_5.5.0_3.0_1725617321248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_emotion_37_labels_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_emotion_37_labels_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_emotion_37_labels_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|877.5 MB| + +## References + +https://huggingface.co/upsalite/xlm-roberta-base-finetuned-emotion-37-labels + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_en.md new file mode 100644 index 00000000000000..427a5d3a790423 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_hslu XlmRoBertaForSequenceClassification from lewtun +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_hslu +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_hslu` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_hslu_en_5.5.0_3.0_1725617149873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_hslu_en_5.5.0_3.0_1725617149873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_hslu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_hslu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_hslu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/lewtun/xlm-roberta-base-finetuned-marc-en-hslu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en.md new file mode 100644 index 00000000000000..7093e7ab164878 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_hslu_pipeline pipeline XlmRoBertaForSequenceClassification from lewtun +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_hslu_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_hslu_pipeline` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en_5.5.0_3.0_1725617244186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_hslu_pipeline_en_5.5.0_3.0_1725617244186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_hslu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_hslu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_hslu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/lewtun/xlm-roberta-base-finetuned-marc-en-hslu + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en.md new file mode 100644 index 00000000000000..a4f3449b0b9349 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en_5.5.0_3.0_1725598487448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_en_5.5.0_3.0_1725598487448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-new-data-visquad2-13-3-2023-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en.md new file mode 100644 index 00000000000000..2d6b4d7d873134 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en_5.5.0_3.0_1725598550755.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline_en_5.5.0_3.0_1725598550755.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_nepal_bhasa_data_visquad2_13_3_2023_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-new-data-visquad2-13-3-2023-3 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_en.md new file mode 100644 index 00000000000000..71239199350d12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_100yen XlmRoBertaForTokenClassification from 100yen +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_100yen +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_100yen` is a English model originally trained by 100yen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_100yen_en_5.5.0_3.0_1725593638112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_100yen_en_5.5.0_3.0_1725593638112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_100yen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_100yen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_100yen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/100yen/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en.md new file mode 100644 index 00000000000000..3a20009fd02741 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_100yen_pipeline pipeline XlmRoBertaForTokenClassification from 100yen +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_100yen_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_100yen_pipeline` is a English model originally trained by 100yen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en_5.5.0_3.0_1725593722554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_100yen_pipeline_en_5.5.0_3.0_1725593722554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_100yen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_100yen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_100yen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/100yen/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_en.md new file mode 100644 index 00000000000000..9579e45da71d72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_54data XlmRoBertaForTokenClassification from 54data +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_54data +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_54data` is a English model originally trained by 54data. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_54data_en_5.5.0_3.0_1725656821849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_54data_en_5.5.0_3.0_1725656821849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_54data","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_54data", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_54data| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/54data/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_pipeline_en.md new file mode 100644 index 00000000000000..abcdf3c34c8088 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_54data_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_54data_pipeline pipeline XlmRoBertaForTokenClassification from 54data +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_54data_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_54data_pipeline` is a English model originally trained by 54data. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_54data_pipeline_en_5.5.0_3.0_1725656884970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_54data_pipeline_en_5.5.0_3.0_1725656884970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_54data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_54data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_54data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/54data/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_en.md new file mode 100644 index 00000000000000..4519a9634b0d20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_buruzaemon XlmRoBertaForTokenClassification from buruzaemon +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_buruzaemon +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_buruzaemon` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_buruzaemon_en_5.5.0_3.0_1725657778707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_buruzaemon_en_5.5.0_3.0_1725657778707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_buruzaemon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_buruzaemon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_buruzaemon| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/buruzaemon/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en.md new file mode 100644 index 00000000000000..cf0cb5153bb0a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline pipeline XlmRoBertaForTokenClassification from buruzaemon +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en_5.5.0_3.0_1725657858288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline_en_5.5.0_3.0_1725657858288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_buruzaemon_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/buruzaemon/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_en.md new file mode 100644 index 00000000000000..711a19550f7c93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_gcmsrc XlmRoBertaForTokenClassification from gcmsrc +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_gcmsrc +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_gcmsrc` is a English model originally trained by gcmsrc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_gcmsrc_en_5.5.0_3.0_1725656674754.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_gcmsrc_en_5.5.0_3.0_1725656674754.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_gcmsrc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_gcmsrc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_gcmsrc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|859.8 MB| + +## References + +https://huggingface.co/gcmsrc/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en.md new file mode 100644 index 00000000000000..0e00b705e26e70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline pipeline XlmRoBertaForTokenClassification from gcmsrc +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline` is a English model originally trained by gcmsrc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en_5.5.0_3.0_1725656745365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline_en_5.5.0_3.0_1725656745365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_gcmsrc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|859.8 MB| + +## References + +https://huggingface.co/gcmsrc/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_en.md new file mode 100644 index 00000000000000..1b9dd68dfe28c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_ankit15nov XlmRoBertaForTokenClassification from Ankit15nov +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_ankit15nov +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_ankit15nov` is a English model originally trained by Ankit15nov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ankit15nov_en_5.5.0_3.0_1725658126749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ankit15nov_en_5.5.0_3.0_1725658126749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_ankit15nov","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_ankit15nov", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_ankit15nov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/Ankit15nov/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en.md new file mode 100644 index 00000000000000..aa5794c4d5c441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline pipeline XlmRoBertaForTokenClassification from Ankit15nov +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline` is a English model originally trained by Ankit15nov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en_5.5.0_3.0_1725658221593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline_en_5.5.0_3.0_1725658221593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_ankit15nov_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/Ankit15nov/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_en.md new file mode 100644 index 00000000000000..dac0c061aac5eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_cataluna84 XlmRoBertaForTokenClassification from cataluna84 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_cataluna84 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_cataluna84` is a English model originally trained by cataluna84. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_cataluna84_en_5.5.0_3.0_1725592065982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_cataluna84_en_5.5.0_3.0_1725592065982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_cataluna84","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_cataluna84", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_cataluna84| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/cataluna84/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en.md new file mode 100644 index 00000000000000..3322c1b7c6c916 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline pipeline XlmRoBertaForTokenClassification from cataluna84 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline` is a English model originally trained by cataluna84. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en_5.5.0_3.0_1725592163898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline_en_5.5.0_3.0_1725592163898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_cataluna84_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/cataluna84/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_leosol_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_leosol_en.md new file mode 100644 index 00000000000000..ab769ae89aba83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_english_leosol_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_leosol XlmRoBertaForTokenClassification from leosol +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_leosol +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_leosol` is a English model originally trained by leosol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_leosol_en_5.5.0_3.0_1725593683348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_leosol_en_5.5.0_3.0_1725593683348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_leosol","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_leosol", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_leosol| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|814.3 MB| + +## References + +https://huggingface.co/leosol/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_ankit15nov_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_ankit15nov_en.md new file mode 100644 index 00000000000000..34ff1fc5f290ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_ankit15nov_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_ankit15nov XlmRoBertaForTokenClassification from Ankit15nov +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_ankit15nov +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_ankit15nov` is a English model originally trained by Ankit15nov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_ankit15nov_en_5.5.0_3.0_1725657045627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_ankit15nov_en_5.5.0_3.0_1725657045627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_ankit15nov","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_ankit15nov", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_ankit15nov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/Ankit15nov/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_en.md new file mode 100644 index 00000000000000..df79d492b8d3c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_sorabe XlmRoBertaForTokenClassification from SORABE +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_sorabe +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_sorabe` is a English model originally trained by SORABE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sorabe_en_5.5.0_3.0_1725591788125.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sorabe_en_5.5.0_3.0_1725591788125.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_sorabe","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_sorabe", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_sorabe| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|827.9 MB| + +## References + +https://huggingface.co/SORABE/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en.md new file mode 100644 index 00000000000000..2ad056d0d8fda7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_sorabe_pipeline pipeline XlmRoBertaForTokenClassification from SORABE +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_sorabe_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_sorabe_pipeline` is a English model originally trained by SORABE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en_5.5.0_3.0_1725591883785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sorabe_pipeline_en_5.5.0_3.0_1725591883785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_sorabe_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_sorabe_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_sorabe_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|827.9 MB| + +## References + +https://huggingface.co/SORABE/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_en.md new file mode 100644 index 00000000000000..3d0a61ab6c8f54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_sungwoo1 XlmRoBertaForTokenClassification from sungwoo1 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_sungwoo1 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_sungwoo1` is a English model originally trained by sungwoo1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sungwoo1_en_5.5.0_3.0_1725593472271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sungwoo1_en_5.5.0_3.0_1725593472271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_sungwoo1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_sungwoo1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_sungwoo1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/sungwoo1/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en.md new file mode 100644 index 00000000000000..4ed6d2a039f6ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline pipeline XlmRoBertaForTokenClassification from sungwoo1 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline` is a English model originally trained by sungwoo1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en_5.5.0_3.0_1725593550242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline_en_5.5.0_3.0_1725593550242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_sungwoo1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/sungwoo1/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_transformersbook_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_transformersbook_en.md new file mode 100644 index 00000000000000..82dad9498faae8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_transformersbook_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_transformersbook XlmRoBertaForTokenClassification from transformersbook +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_transformersbook +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_transformersbook` is a English model originally trained by transformersbook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_transformersbook_en_5.5.0_3.0_1725657940587.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_transformersbook_en_5.5.0_3.0_1725657940587.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_transformersbook","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_transformersbook", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_transformersbook| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/transformersbook/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_en.md new file mode 100644 index 00000000000000..29f4deee19ebf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_youngbreadho XlmRoBertaForTokenClassification from youngbreadho +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_youngbreadho +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_youngbreadho` is a English model originally trained by youngbreadho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_youngbreadho_en_5.5.0_3.0_1725656409601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_youngbreadho_en_5.5.0_3.0_1725656409601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_youngbreadho","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_youngbreadho", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_youngbreadho| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|836.9 MB| + +## References + +https://huggingface.co/youngbreadho/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en.md new file mode 100644 index 00000000000000..d47e26c6ed7b7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline pipeline XlmRoBertaForTokenClassification from youngbreadho +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline` is a English model originally trained by youngbreadho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en_5.5.0_3.0_1725656492485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline_en_5.5.0_3.0_1725656492485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_youngbreadho_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|836.9 MB| + +## References + +https://huggingface.co/youngbreadho/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_en.md new file mode 100644 index 00000000000000..5b87c98463954d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_adalberti XlmRoBertaForTokenClassification from adalberti +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_adalberti +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_adalberti` is a English model originally trained by adalberti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_adalberti_en_5.5.0_3.0_1725657468995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_adalberti_en_5.5.0_3.0_1725657468995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_adalberti","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_adalberti", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_adalberti| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/adalberti/xlm-roberta-base-finetuned-panx-de-adalberti \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en.md new file mode 100644 index 00000000000000..bbcedd169d33f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_adalberti_pipeline pipeline XlmRoBertaForTokenClassification from adalberti +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_adalberti_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_adalberti_pipeline` is a English model originally trained by adalberti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en_5.5.0_3.0_1725657552855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_adalberti_pipeline_en_5.5.0_3.0_1725657552855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_adalberti_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_adalberti_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_adalberti_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/adalberti/xlm-roberta-base-finetuned-panx-de-adalberti + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en.md new file mode 100644 index 00000000000000..7ea7b52bec0695 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_ahmad_alismail XlmRoBertaForTokenClassification from ahmad-alismail +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_ahmad_alismail +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_ahmad_alismail` is a English model originally trained by ahmad-alismail. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en_5.5.0_3.0_1725658319833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_ahmad_alismail_en_5.5.0_3.0_1725658319833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_ahmad_alismail","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_ahmad_alismail", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_ahmad_alismail| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/ahmad-alismail/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_en.md new file mode 100644 index 00000000000000..6f81092ba89aff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_francos XlmRoBertaForTokenClassification from francos +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_francos +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_francos` is a English model originally trained by francos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_francos_en_5.5.0_3.0_1725657557680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_francos_en_5.5.0_3.0_1725657557680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_francos","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_francos", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_francos| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/francos/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_pipeline_en.md new file mode 100644 index 00000000000000..26f1bbb1a6b4b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_francos_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_francos_pipeline pipeline XlmRoBertaForTokenClassification from francos +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_francos_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_francos_pipeline` is a English model originally trained by francos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_francos_pipeline_en_5.5.0_3.0_1725657628001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_francos_pipeline_en_5.5.0_3.0_1725657628001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_francos_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_francos_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_francos_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/francos/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en.md new file mode 100644 index 00000000000000..db67c382822280 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_gcmsrc XlmRoBertaForTokenClassification from gcmsrc +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_gcmsrc +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_gcmsrc` is a English model originally trained by gcmsrc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en_5.5.0_3.0_1725656677850.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_gcmsrc_en_5.5.0_3.0_1725656677850.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_gcmsrc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_gcmsrc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_gcmsrc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/gcmsrc/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en.md new file mode 100644 index 00000000000000..e9c05c64a24fb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_reinoudbosch XlmRoBertaForTokenClassification from reinoudbosch +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_reinoudbosch +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_reinoudbosch` is a English model originally trained by reinoudbosch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en_5.5.0_3.0_1725658181911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_en_5.5.0_3.0_1725658181911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_reinoudbosch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_reinoudbosch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_reinoudbosch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/reinoudbosch/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en.md new file mode 100644 index 00000000000000..e94f35825dd602 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline pipeline XlmRoBertaForTokenClassification from reinoudbosch +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline` is a English model originally trained by reinoudbosch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en_5.5.0_3.0_1725658247321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline_en_5.5.0_3.0_1725658247321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_reinoudbosch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/reinoudbosch/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_en.md new file mode 100644 index 00000000000000..7df60138162b39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_rupe XlmRoBertaForTokenClassification from RupE +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_rupe +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_rupe` is a English model originally trained by RupE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_rupe_en_5.5.0_3.0_1725593254469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_rupe_en_5.5.0_3.0_1725593254469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_rupe","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_rupe", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_rupe| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/RupE/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en.md new file mode 100644 index 00000000000000..ce970663be3091 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline pipeline XlmRoBertaForTokenClassification from RupE +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline` is a English model originally trained by RupE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en_5.5.0_3.0_1725593336727.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline_en_5.5.0_3.0_1725593336727.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_rupe_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/RupE/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_en.md new file mode 100644 index 00000000000000..3f75bf9420aa86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_songys XlmRoBertaForTokenClassification from songys +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_songys +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_songys` is a English model originally trained by songys. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_songys_en_5.5.0_3.0_1725593273348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_songys_en_5.5.0_3.0_1725593273348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_songys","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_songys", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_songys| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.5 MB| + +## References + +https://huggingface.co/songys/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en.md new file mode 100644 index 00000000000000..98004ace60a504 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_songys_pipeline pipeline XlmRoBertaForTokenClassification from songys +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_songys_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_songys_pipeline` is a English model originally trained by songys. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en_5.5.0_3.0_1725593339642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_songys_pipeline_en_5.5.0_3.0_1725593339642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_songys_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_songys_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_songys_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.5 MB| + +## References + +https://huggingface.co/songys/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en.md new file mode 100644 index 00000000000000..eb18a884a0dad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_wooseok0303 XlmRoBertaForTokenClassification from wooseok0303 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_wooseok0303 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_wooseok0303` is a English model originally trained by wooseok0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en_5.5.0_3.0_1725592164386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_wooseok0303_en_5.5.0_3.0_1725592164386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_wooseok0303","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_wooseok0303", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_wooseok0303| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/wooseok0303/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en.md new file mode 100644 index 00000000000000..262841fb2f20c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline pipeline XlmRoBertaForTokenClassification from wooseok0303 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline` is a English model originally trained by wooseok0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en_5.5.0_3.0_1725592258129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline_en_5.5.0_3.0_1725592258129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_wooseok0303_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/wooseok0303/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_zebans_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_zebans_en.md new file mode 100644 index 00000000000000..74f5ffe2d61850 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_french_zebans_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_zebans XlmRoBertaForTokenClassification from zebans +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_zebans +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_zebans` is a English model originally trained by zebans. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_zebans_en_5.5.0_3.0_1725657622614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_zebans_en_5.5.0_3.0_1725657622614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_zebans","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_zebans", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_zebans| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/zebans/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ghks4861_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ghks4861_en.md new file mode 100644 index 00000000000000..b20e1b2f01ebb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_ghks4861_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_ghks4861 XlmRoBertaForTokenClassification from ghks4861 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_ghks4861 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_ghks4861` is a English model originally trained by ghks4861. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_ghks4861_en_5.5.0_3.0_1725657186216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_ghks4861_en_5.5.0_3.0_1725657186216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_ghks4861","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_ghks4861", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_ghks4861| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/ghks4861/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_en.md new file mode 100644 index 00000000000000..ce99aa0f0042f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_jpaulhunter XlmRoBertaForTokenClassification from jpaulhunter +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_jpaulhunter +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_jpaulhunter` is a English model originally trained by jpaulhunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jpaulhunter_en_5.5.0_3.0_1725657976501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jpaulhunter_en_5.5.0_3.0_1725657976501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_jpaulhunter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_jpaulhunter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_jpaulhunter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.7 MB| + +## References + +https://huggingface.co/jpaulhunter/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en.md new file mode 100644 index 00000000000000..2c48cb4e2634a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline pipeline XlmRoBertaForTokenClassification from jpaulhunter +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline` is a English model originally trained by jpaulhunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en_5.5.0_3.0_1725658042854.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline_en_5.5.0_3.0_1725658042854.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_jpaulhunter_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/jpaulhunter/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_en.md new file mode 100644 index 00000000000000..e7eb3a9f285d82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_jslowik XlmRoBertaForTokenClassification from jslowik +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_jslowik +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_jslowik` is a English model originally trained by jslowik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jslowik_en_5.5.0_3.0_1725591785240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jslowik_en_5.5.0_3.0_1725591785240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_jslowik","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_jslowik", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_jslowik| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/jslowik/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en.md new file mode 100644 index 00000000000000..c599f8b4cd2af1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_jslowik_pipeline pipeline XlmRoBertaForTokenClassification from jslowik +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_jslowik_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_jslowik_pipeline` is a English model originally trained by jslowik. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en_5.5.0_3.0_1725591853724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_jslowik_pipeline_en_5.5.0_3.0_1725591853724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_jslowik_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_jslowik_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_jslowik_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/jslowik/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_en.md new file mode 100644 index 00000000000000..37d5221d11b849 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_kuma_s XlmRoBertaForTokenClassification from kuma-s +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_kuma_s +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_kuma_s` is a English model originally trained by kuma-s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_kuma_s_en_5.5.0_3.0_1725591992544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_kuma_s_en_5.5.0_3.0_1725591992544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_kuma_s","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_kuma_s", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_kuma_s| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/kuma-s/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en.md new file mode 100644 index 00000000000000..1c5b2161c5f278 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline pipeline XlmRoBertaForTokenClassification from kuma-s +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline` is a English model originally trained by kuma-s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en_5.5.0_3.0_1725592061122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline_en_5.5.0_3.0_1725592061122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_kuma_s_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/kuma-s/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_en.md new file mode 100644 index 00000000000000..d9d14cdfb19acb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_mmenendezg XlmRoBertaForTokenClassification from mmenendezg +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_mmenendezg +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_mmenendezg` is a English model originally trained by mmenendezg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_mmenendezg_en_5.5.0_3.0_1725593133007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_mmenendezg_en_5.5.0_3.0_1725593133007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_mmenendezg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_mmenendezg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_mmenendezg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.7 MB| + +## References + +https://huggingface.co/mmenendezg/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en.md new file mode 100644 index 00000000000000..099f6bf64192e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline pipeline XlmRoBertaForTokenClassification from mmenendezg +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline` is a English model originally trained by mmenendezg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en_5.5.0_3.0_1725593202646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline_en_5.5.0_3.0_1725593202646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_mmenendezg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/mmenendezg/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en.md new file mode 100644 index 00000000000000..e72c47ad8c5722 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_nadezda_at_htec XlmRoBertaForTokenClassification from nadezda-at-htec +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_nadezda_at_htec +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_nadezda_at_htec` is a English model originally trained by nadezda-at-htec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en_5.5.0_3.0_1725593085992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_en_5.5.0_3.0_1725593085992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nadezda_at_htec","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nadezda_at_htec", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_nadezda_at_htec| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/nadezda-at-htec/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en.md new file mode 100644 index 00000000000000..45310912fd086a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline pipeline XlmRoBertaForTokenClassification from nadezda-at-htec +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline` is a English model originally trained by nadezda-at-htec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en_5.5.0_3.0_1725593153226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline_en_5.5.0_3.0_1725593153226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_nadezda_at_htec_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/nadezda-at-htec/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_en.md new file mode 100644 index 00000000000000..e89f2a93d827bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_nerdai XlmRoBertaForTokenClassification from nerdai +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_nerdai +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_nerdai` is a English model originally trained by nerdai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nerdai_en_5.5.0_3.0_1725592945307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nerdai_en_5.5.0_3.0_1725592945307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nerdai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nerdai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_nerdai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/nerdai/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en.md new file mode 100644 index 00000000000000..624a029ba1670d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_nerdai_pipeline pipeline XlmRoBertaForTokenClassification from nerdai +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_nerdai_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_nerdai_pipeline` is a English model originally trained by nerdai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en_5.5.0_3.0_1725593011422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nerdai_pipeline_en_5.5.0_3.0_1725593011422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_nerdai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_nerdai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_nerdai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/nerdai/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_en.md new file mode 100644 index 00000000000000..820c08a161d559 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_roshanrai1304 XlmRoBertaForTokenClassification from roshanrai1304 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_roshanrai1304 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_roshanrai1304` is a English model originally trained by roshanrai1304. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_roshanrai1304_en_5.5.0_3.0_1725593794832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_roshanrai1304_en_5.5.0_3.0_1725593794832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_roshanrai1304","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_roshanrai1304", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_roshanrai1304| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/roshanrai1304/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en.md new file mode 100644 index 00000000000000..5b0c075f68b0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline pipeline XlmRoBertaForTokenClassification from roshanrai1304 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline` is a English model originally trained by roshanrai1304. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en_5.5.0_3.0_1725593867256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline_en_5.5.0_3.0_1725593867256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_roshanrai1304_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/roshanrai1304/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_en.md new file mode 100644 index 00000000000000..64ac9117bcb6e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_amitjain171980 XlmRoBertaForTokenClassification from amitjain171980 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_amitjain171980 +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_amitjain171980` is a English model originally trained by amitjain171980. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_amitjain171980_en_5.5.0_3.0_1725657060933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_amitjain171980_en_5.5.0_3.0_1725657060933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_amitjain171980","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_amitjain171980", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_amitjain171980| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/amitjain171980/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en.md new file mode 100644 index 00000000000000..c01d621847f28e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline pipeline XlmRoBertaForTokenClassification from amitjain171980 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline` is a English model originally trained by amitjain171980. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en_5.5.0_3.0_1725657148020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline_en_5.5.0_3.0_1725657148020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_amitjain171980_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/amitjain171980/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_en.md new file mode 100644 index 00000000000000..86761a051b7356 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_vonewman XlmRoBertaForTokenClassification from vonewman +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_vonewman +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_vonewman` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_vonewman_en_5.5.0_3.0_1725592380403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_vonewman_en_5.5.0_3.0_1725592380403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_vonewman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_vonewman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_vonewman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|815.8 MB| + +## References + +https://huggingface.co/vonewman/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en.md new file mode 100644 index 00000000000000..ff55fb57fddb8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline pipeline XlmRoBertaForTokenClassification from vonewman +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en_5.5.0_3.0_1725592482660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline_en_5.5.0_3.0_1725592482660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_vonewman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|815.8 MB| + +## References + +https://huggingface.co/vonewman/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_rugo_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_rugo_en.md new file mode 100644 index 00000000000000..9da8beeaab87e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_rugo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_rugo XlmRoBertaEmbeddings from rugo +author: John Snow Labs +name: xlm_roberta_base_finetuned_rugo +date: 2024-09-06 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_rugo` is a English model originally trained by rugo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_rugo_en_5.5.0_3.0_1725596519097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_rugo_en_5.5.0_3.0_1725596519097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_rugo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_rugo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_rugo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|987.4 MB| + +## References + +https://huggingface.co/rugo/xlm-roberta-base-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_en.md new file mode 100644 index 00000000000000..4d1dbcd15f3cb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_squad_au_jluckyboyj XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_squad_au_jluckyboyj +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_squad_au_jluckyboyj` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_au_jluckyboyj_en_5.5.0_3.0_1725630283551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_au_jluckyboyj_en_5.5.0_3.0_1725630283551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_squad_au_jluckyboyj","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_squad_au_jluckyboyj", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_squad_au_jluckyboyj| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|874.8 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-squad_au \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en.md new file mode 100644 index 00000000000000..5cd9918e49eca5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en_5.5.0_3.0_1725630346083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline_en_5.5.0_3.0_1725630346083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_squad_au_jluckyboyj_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|874.8 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-squad_au + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_en.md new file mode 100644 index 00000000000000..e29cd09e0ae441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_visquad_2 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_visquad_2 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_visquad_2` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_visquad_2_en_5.5.0_3.0_1725598022633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_visquad_2_en_5.5.0_3.0_1725598022633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_visquad_2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_visquad_2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_visquad_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|852.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-visquad-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_pipeline_en.md new file mode 100644 index 00000000000000..012ad7b9def297 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_finetuned_visquad_2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_visquad_2_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_visquad_2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_visquad_2_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_visquad_2_pipeline_en_5.5.0_3.0_1725598090004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_visquad_2_pipeline_en_5.5.0_3.0_1725598090004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_visquad_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_visquad_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_visquad_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|852.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-visquad-2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en.md new file mode 100644 index 00000000000000..1178112389a77a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_ft_udpos213_top9lang_southern_sotho XlmRoBertaForTokenClassification from iceman2434 +author: John Snow Labs +name: xlm_roberta_base_ft_udpos213_top9lang_southern_sotho +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_ft_udpos213_top9lang_southern_sotho` is a English model originally trained by iceman2434. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en_5.5.0_3.0_1725658089264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_en_5.5.0_3.0_1725658089264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ft_udpos213_top9lang_southern_sotho","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ft_udpos213_top9lang_southern_sotho", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_ft_udpos213_top9lang_southern_sotho| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|806.1 MB| + +## References + +https://huggingface.co/iceman2434/xlm-roberta-base_ft_udpos213-top9lang-st \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en.md new file mode 100644 index 00000000000000..8ddb1fe0f15467 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline pipeline XlmRoBertaForTokenClassification from iceman2434 +author: John Snow Labs +name: xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline` is a English model originally trained by iceman2434. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en_5.5.0_3.0_1725658217983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline_en_5.5.0_3.0_1725658217983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_ft_udpos213_top9lang_southern_sotho_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|806.1 MB| + +## References + +https://huggingface.co/iceman2434/xlm-roberta-base_ft_udpos213-top9lang-st + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_germeval21_toxic_with_data_augmentation_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_germeval21_toxic_with_data_augmentation_en.md new file mode 100644 index 00000000000000..270ec69d833a8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_germeval21_toxic_with_data_augmentation_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_germeval21_toxic_with_data_augmentation XlmRoBertaForSequenceClassification from airKlizz +author: John Snow Labs +name: xlm_roberta_base_germeval21_toxic_with_data_augmentation +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_germeval21_toxic_with_data_augmentation` is a English model originally trained by airKlizz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_germeval21_toxic_with_data_augmentation_en_5.5.0_3.0_1725619517991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_germeval21_toxic_with_data_augmentation_en_5.5.0_3.0_1725619517991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_germeval21_toxic_with_data_augmentation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_germeval21_toxic_with_data_augmentation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_germeval21_toxic_with_data_augmentation| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|790.0 MB| + +## References + +https://huggingface.co/airKlizz/xlm-roberta-base-germeval21-toxic-with-data-augmentation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_hungarian_ner_huner_hu.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_hungarian_ner_huner_hu.md new file mode 100644 index 00000000000000..1dfadaf6605509 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_hungarian_ner_huner_hu.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hungarian xlm_roberta_base_hungarian_ner_huner XlmRoBertaForTokenClassification from EvanD +author: John Snow Labs +name: xlm_roberta_base_hungarian_ner_huner +date: 2024-09-06 +tags: [hu, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_hungarian_ner_huner` is a Hungarian model originally trained by EvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_hungarian_ner_huner_hu_5.5.0_3.0_1725656420535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_hungarian_ner_huner_hu_5.5.0_3.0_1725656420535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_hungarian_ner_huner","hu") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_hungarian_ner_huner", "hu") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_hungarian_ner_huner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|hu| +|Size:|784.0 MB| + +## References + +https://huggingface.co/EvanD/xlm-roberta-base-hungarian-ner-huner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en.md new file mode 100644 index 00000000000000..fb9f3a03b62fea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en_5.5.0_3.0_1725620423298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_en_5.5.0_3.0_1725620423298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|799.2 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-New_VietNam-aug_insert_BERT-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en.md new file mode 100644 index 00000000000000..8a22673b9883c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en_5.5.0_3.0_1725620554866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline_en_5.5.0_3.0_1725620554866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_bert_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|799.2 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-New_VietNam-aug_insert_BERT-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_en.md new file mode 100644 index 00000000000000..92d60c6d2121f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_panx_dataset_russian XlmRoBertaForTokenClassification from tner +author: John Snow Labs +name: xlm_roberta_base_panx_dataset_russian +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_panx_dataset_russian` is a English model originally trained by tner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_russian_en_5.5.0_3.0_1725592785929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_russian_en_5.5.0_3.0_1725592785929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_panx_dataset_russian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_panx_dataset_russian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_panx_dataset_russian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|791.9 MB| + +## References + +https://huggingface.co/tner/xlm-roberta-base-panx-dataset-ru \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_pipeline_en.md new file mode 100644 index 00000000000000..ba4d3f7d602c3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_panx_dataset_russian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_panx_dataset_russian_pipeline pipeline XlmRoBertaForTokenClassification from tner +author: John Snow Labs +name: xlm_roberta_base_panx_dataset_russian_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_panx_dataset_russian_pipeline` is a English model originally trained by tner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_russian_pipeline_en_5.5.0_3.0_1725592919897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_russian_pipeline_en_5.5.0_3.0_1725592919897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_panx_dataset_russian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_panx_dataset_russian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_panx_dataset_russian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|792.0 MB| + +## References + +https://huggingface.co/tner/xlm-roberta-base-panx-dataset-ru + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru.md new file mode 100644 index 00000000000000..ae07ce2e66952b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Russian xlm_roberta_base_russian_sentiment_rusentiment_pipeline pipeline XlmRoBertaForSequenceClassification from sismetanin +author: John Snow Labs +name: xlm_roberta_base_russian_sentiment_rusentiment_pipeline +date: 2024-09-06 +tags: [ru, open_source, pipeline, onnx] +task: Text Classification +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_russian_sentiment_rusentiment_pipeline` is a Russian model originally trained by sismetanin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru_5.5.0_3.0_1725617483877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_russian_sentiment_rusentiment_pipeline_ru_5.5.0_3.0_1725617483877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_russian_sentiment_rusentiment_pipeline", lang = "ru") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_russian_sentiment_rusentiment_pipeline", lang = "ru") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_russian_sentiment_rusentiment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ru| +|Size:|799.7 MB| + +## References + +https://huggingface.co/sismetanin/xlm_roberta_base-ru-sentiment-rusentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en.md new file mode 100644 index 00000000000000..cfcce5ee889eeb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en_5.5.0_3.0_1725619277074.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline_en_5.5.0_3.0_1725619277074.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_spanish_60000_xnli_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|470.3 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-es-60000-xnli-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_tweet_sentiment_spanish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_tweet_sentiment_spanish_pipeline_en.md new file mode 100644 index 00000000000000..21a1433d933cab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_tweet_sentiment_spanish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_spanish_pipeline pipeline XlmRoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_spanish_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_spanish_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_pipeline_en_5.5.0_3.0_1725619163262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_pipeline_en_5.5.0_3.0_1725619163262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|780.1 MB| + +## References + +https://huggingface.co/cardiffnlp/xlm-roberta-base-tweet-sentiment-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_uncased_pina_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_uncased_pina_en.md new file mode 100644 index 00000000000000..1571c1f4273fe4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_uncased_pina_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_uncased_pina XlmRoBertaForSequenceClassification from GhifSmile +author: John Snow Labs +name: xlm_roberta_base_uncased_pina +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_uncased_pina` is a English model originally trained by GhifSmile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_uncased_pina_en_5.5.0_3.0_1725618960839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_uncased_pina_en_5.5.0_3.0_1725618960839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_uncased_pina","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_uncased_pina", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_uncased_pina| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/GhifSmile/xlm-roberta-base-uncased-PINA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_wolof_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_wolof_pipeline_en.md new file mode 100644 index 00000000000000..bced23be9de1af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_wolof_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_wolof_pipeline pipeline XlmRoBertaForTokenClassification from vonewman +author: John Snow Labs +name: xlm_roberta_base_wolof_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_wolof_pipeline` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wolof_pipeline_en_5.5.0_3.0_1725658536725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wolof_pipeline_en_5.5.0_3.0_1725658536725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_wolof_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_wolof_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_wolof_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|777.6 MB| + +## References + +https://huggingface.co/vonewman/xlm-roberta-base-wolof + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_xnli_french_trimmed_french_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_xnli_french_trimmed_french_en.md new file mode 100644 index 00000000000000..057863e1cd7d06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_base_xnli_french_trimmed_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_xnli_french_trimmed_french XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_xnli_french_trimmed_french +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_xnli_french_trimmed_french` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xnli_french_trimmed_french_en_5.5.0_3.0_1725619978709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xnli_french_trimmed_french_en_5.5.0_3.0_1725619978709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_xnli_french_trimmed_french","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_xnli_french_trimmed_french", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_xnli_french_trimmed_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|515.5 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-xnli-fr-trimmed-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en.md new file mode 100644 index 00000000000000..c5682d69e8e2b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5 XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en_5.5.0_3.0_1725630349351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_en_5.5.0_3.0_1725630349351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|818.0 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694025792-8-2e-06-0.01-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en.md new file mode 100644 index 00000000000000..50e8a338cd4cff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline pipeline XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en_5.5.0_3.0_1725630479354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline_en_5.5.0_3.0_1725630479354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694025792_8_2e_06_0_01_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.0 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694025792-8-2e-06-0.01-5 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en.md new file mode 100644 index 00000000000000..4b963f61c5b6dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3 XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en_5.5.0_3.0_1725597774751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_en_5.5.0_3.0_1725597774751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|839.5 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694026018-16-2e-05-0.01-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en.md new file mode 100644 index 00000000000000..26d18a8a162258 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline pipeline XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en_5.5.0_3.0_1725597879682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline_en_5.5.0_3.0_1725597879682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694026018_16_2e_05_0_01_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|839.5 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694026018-16-2e-05-0.01-3 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en.md new file mode 100644 index 00000000000000..ab408704a0e2ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3 XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3 +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en_5.5.0_3.0_1725598288600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_en_5.5.0_3.0_1725598288600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|839.6 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694026058-8-2e-05-0.01-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en.md new file mode 100644 index 00000000000000..16eba75cfcc046 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline pipeline XlmRoBertaForQuestionAnswering from rizquuula +author: John Snow Labs +name: xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline` is a English model originally trained by rizquuula. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en_5.5.0_3.0_1725598393978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline_en_5.5.0_3.0_1725598393978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_indosquadv2_1694026058_8_2e_05_0_01_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|839.6 MB| + +## References + +https://huggingface.co/rizquuula/XLM-RoBERTa-IndoSQuADv2_1694026058-8-2e-05-0.01-3 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_en.md new file mode 100644 index 00000000000000..db152631ea798d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_low_resource_langid_large2 XlmRoBertaForSequenceClassification from simoneteglia +author: John Snow Labs +name: xlm_roberta_low_resource_langid_large2 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_low_resource_langid_large2` is a English model originally trained by simoneteglia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_low_resource_langid_large2_en_5.5.0_3.0_1725616747436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_low_resource_langid_large2_en_5.5.0_3.0_1725616747436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_low_resource_langid_large2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_low_resource_langid_large2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_low_resource_langid_large2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|956.0 MB| + +## References + +https://huggingface.co/simoneteglia/xlm-roberta-low-resource-langID-large2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_pipeline_en.md new file mode 100644 index 00000000000000..6b245a3632b9f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_low_resource_langid_large2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_low_resource_langid_large2_pipeline pipeline XlmRoBertaForSequenceClassification from simoneteglia +author: John Snow Labs +name: xlm_roberta_low_resource_langid_large2_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_low_resource_langid_large2_pipeline` is a English model originally trained by simoneteglia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_low_resource_langid_large2_pipeline_en_5.5.0_3.0_1725616823602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_low_resource_langid_large2_pipeline_en_5.5.0_3.0_1725616823602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_low_resource_langid_large2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_low_resource_langid_large2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_low_resource_langid_large2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|956.0 MB| + +## References + +https://huggingface.co/simoneteglia/xlm-roberta-low-resource-langID-large2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw.md new file mode 100644 index 00000000000000..f4f358b6e0f5aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Swahili (macrolanguage) xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline pipeline XlmRoBertaForQuestionAnswering from cjrowe +author: John Snow Labs +name: xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline +date: 2024-09-06 +tags: [sw, open_source, pipeline, onnx] +task: Question Answering +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline` is a Swahili (macrolanguage) model originally trained by cjrowe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw_5.5.0_3.0_1725640814781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline_sw_5.5.0_3.0_1725640814781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline", lang = "sw") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline", lang = "sw") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_afriberta_base_finetuned_tydiqa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sw| +|Size:|415.2 MB| + +## References + +https://huggingface.co/cjrowe/afriberta_base-finetuned-tydiqa + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw.md new file mode 100644 index 00000000000000..972cdb7dd7d5e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw.md @@ -0,0 +1,106 @@ +--- +layout: model +title: Swahili XlmRoBertaForQuestionAnswering (from cjrowe) +author: John Snow Labs +name: xlm_roberta_qa_afriberta_base_finetuned_tydiqa +date: 2024-09-06 +tags: [sw, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `afriberta_base-finetuned-tydiqa` is a Swahili model originally trained by `cjrowe`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw_5.5.0_3.0_1725640789447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_afriberta_base_finetuned_tydiqa_sw_5.5.0_3.0_1725640789447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_afriberta_base_finetuned_tydiqa","sw") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_afriberta_base_finetuned_tydiqa","sw") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("sw.answer_question.tydiqa.xlm_roberta.base").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_afriberta_base_finetuned_tydiqa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|sw| +|Size:|415.2 MB| + +## References + +References + +- https://huggingface.co/cjrowe/afriberta_base-finetuned-tydiqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en.md new file mode 100644 index 00000000000000..a6eb0edef95ed4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900 +date: 2024-09-06 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-more_fine_tune_24465520-26265900` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en_5.5.0_3.0_1725597931359.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_en_5.5.0_3.0_1725597931359.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.xlm_roberta.fine_tune_24465520_26265900").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|888.2 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en.md new file mode 100644 index 00000000000000..36ad79ea3bc197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en_5.5.0_3.0_1725597997604.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline_en_5.5.0_3.0_1725597997604.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265900_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|888.2 MB| + +## References + +https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265900 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en.md new file mode 100644 index 00000000000000..7bee6afde83b80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904 +date: 2024-09-06 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-more_fine_tune_24465520-26265904` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en_5.5.0_3.0_1725597562199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904_en_5.5.0_3.0_1725597562199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.xlm_roberta.fine_tune_24465520_26265904").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_more_fine_tune_24465520_26265904| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|888.2 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-more_fine_tune_24465520-26265904 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_en.md new file mode 100644 index 00000000000000..8cd001675c5634 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from aicryptogroup) +author: John Snow Labs +name: xlm_roberta_qa_distill_xlm_mrc +date: 2024-09-06 +tags: [en, vi, open_source, question_answering, xlmroberta, xx, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `distill-xlm-mrc` is a multilingual model originally trained by `aicryptogroup`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_distill_xlm_mrc_en_5.5.0_3.0_1725598608362.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_distill_xlm_mrc_en_5.5.0_3.0_1725598608362.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_distill_xlm_mrc","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_distill_xlm_mrc","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.xlm_roberta.distilled").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_distill_xlm_mrc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|150.9 MB| + +## References + +References + +- https://huggingface.co/aicryptogroup/distill-xlm-mrc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_pipeline_en.md new file mode 100644 index 00000000000000..b7b5f357b47275 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_qa_distill_xlm_mrc_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_distill_xlm_mrc_pipeline pipeline XlmRoBertaForQuestionAnswering from aicryptogroup +author: John Snow Labs +name: xlm_roberta_qa_distill_xlm_mrc_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_distill_xlm_mrc_pipeline` is a English model originally trained by aicryptogroup. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_distill_xlm_mrc_pipeline_en_5.5.0_3.0_1725598615785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_distill_xlm_mrc_pipeline_en_5.5.0_3.0_1725598615785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_distill_xlm_mrc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_distill_xlm_mrc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_distill_xlm_mrc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|150.9 MB| + +## References + +https://huggingface.co/aicryptogroup/distill-xlm-mrc + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_thai_2_th.md b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_thai_2_th.md new file mode 100644 index 00000000000000..720901a4ace161 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlm_roberta_thai_2_th.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Thai xlm_roberta_thai_2 XlmRoBertaForQuestionAnswering from milohpeng +author: John Snow Labs +name: xlm_roberta_thai_2 +date: 2024-09-06 +tags: [th, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: th +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_thai_2` is a Thai model originally trained by milohpeng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_thai_2_th_5.5.0_3.0_1725631342839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_thai_2_th_5.5.0_3.0_1725631342839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_thai_2","th") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_thai_2", "th") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_thai_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|th| +|Size:|881.1 MB| + +## References + +https://huggingface.co/milohpeng/xlm-roberta-th-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_en.md new file mode 100644 index 00000000000000..becf146ee2da63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_english_german_all_shuffled_1985_test1000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_all_shuffled_1985_test1000 +date: 2024-09-06 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_all_shuffled_1985_test1000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_1985_test1000_en_5.5.0_3.0_1725619312590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_1985_test1000_en_5.5.0_3.0_1725619312590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_all_shuffled_1985_test1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_all_shuffled_1985_test1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_all_shuffled_1985_test1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|826.3 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-all_shuffled-1985-test1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_pipeline_en.md new file mode 100644 index 00000000000000..60092ba4c3823d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_english_german_all_shuffled_1985_test1000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_english_german_all_shuffled_1985_test1000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_all_shuffled_1985_test1000_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_all_shuffled_1985_test1000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_1985_test1000_pipeline_en_5.5.0_3.0_1725619431904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_all_shuffled_1985_test1000_pipeline_en_5.5.0_3.0_1725619431904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_english_german_all_shuffled_1985_test1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_english_german_all_shuffled_1985_test1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_all_shuffled_1985_test1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.3 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-all_shuffled-1985-test1000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_en.md new file mode 100644 index 00000000000000..3834ff92534002 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_enis_qa_icelandic_finetune_hindi_course XlmRoBertaForQuestionAnswering from nozagleh +author: John Snow Labs +name: xlmr_enis_qa_icelandic_finetune_hindi_course +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_enis_qa_icelandic_finetune_hindi_course` is a English model originally trained by nozagleh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_enis_qa_icelandic_finetune_hindi_course_en_5.5.0_3.0_1725598523391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_enis_qa_icelandic_finetune_hindi_course_en_5.5.0_3.0_1725598523391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_enis_qa_icelandic_finetune_hindi_course","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_enis_qa_icelandic_finetune_hindi_course", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_enis_qa_icelandic_finetune_hindi_course| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|456.7 MB| + +## References + +https://huggingface.co/nozagleh/XLMr-ENIS-QA-Is-finetune-hi-course \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en.md new file mode 100644 index 00000000000000..caa1c4a5fb1d51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline pipeline XlmRoBertaForQuestionAnswering from nozagleh +author: John Snow Labs +name: xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline` is a English model originally trained by nozagleh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en_5.5.0_3.0_1725598546946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline_en_5.5.0_3.0_1725598546946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_enis_qa_icelandic_finetune_hindi_course_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|456.7 MB| + +## References + +https://huggingface.co/nozagleh/XLMr-ENIS-QA-Is-finetune-hi-course + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_en.md new file mode 100644 index 00000000000000..61ad07919f4e5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_idkmrc_webis XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlmr_idkmrc_webis +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_idkmrc_webis` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_idkmrc_webis_en_5.5.0_3.0_1725630622645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_idkmrc_webis_en_5.5.0_3.0_1725630622645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_idkmrc_webis","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_idkmrc_webis", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_idkmrc_webis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|841.6 MB| + +## References + +https://huggingface.co/intanm/xlmr-idkmrc-webis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_pipeline_en.md new file mode 100644 index 00000000000000..b20f656c3f6a35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmr_idkmrc_webis_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_idkmrc_webis_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlmr_idkmrc_webis_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_idkmrc_webis_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_idkmrc_webis_pipeline_en_5.5.0_3.0_1725630734948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_idkmrc_webis_pipeline_en_5.5.0_3.0_1725630734948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_idkmrc_webis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_idkmrc_webis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_idkmrc_webis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|841.6 MB| + +## References + +https://huggingface.co/intanm/xlmr-idkmrc-webis + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_en.md new file mode 100644 index 00000000000000..d9e760c6b86e4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmroberta_finetuned_recipeqa_modified XlmRoBertaForQuestionAnswering from tamhuynh27 +author: John Snow Labs +name: xlmroberta_finetuned_recipeqa_modified +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_finetuned_recipeqa_modified` is a English model originally trained by tamhuynh27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_recipeqa_modified_en_5.5.0_3.0_1725640802679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_recipeqa_modified_en_5.5.0_3.0_1725640802679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmroberta_finetuned_recipeqa_modified","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmroberta_finetuned_recipeqa_modified", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_finetuned_recipeqa_modified| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|856.7 MB| + +## References + +https://huggingface.co/tamhuynh27/xlmroberta-finetuned-recipeqa-modified \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_pipeline_en.md new file mode 100644 index 00000000000000..cce3366f5c016f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_finetuned_recipeqa_modified_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmroberta_finetuned_recipeqa_modified_pipeline pipeline XlmRoBertaForQuestionAnswering from tamhuynh27 +author: John Snow Labs +name: xlmroberta_finetuned_recipeqa_modified_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_finetuned_recipeqa_modified_pipeline` is a English model originally trained by tamhuynh27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_recipeqa_modified_pipeline_en_5.5.0_3.0_1725640874796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_finetuned_recipeqa_modified_pipeline_en_5.5.0_3.0_1725640874796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_finetuned_recipeqa_modified_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_finetuned_recipeqa_modified_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_finetuned_recipeqa_modified_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|856.8 MB| + +## References + +https://huggingface.co/tamhuynh27/xlmroberta-finetuned-recipeqa-modified + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_ner_swahili_sw.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_ner_swahili_sw.md new file mode 100644 index 00000000000000..c21450137c3f96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_ner_swahili_sw.md @@ -0,0 +1,115 @@ +--- +layout: model +title: Swahili XLMRobertaForTokenClassification Base Cased model (from mbeukman) +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_ner_swahili +date: 2024-09-06 +tags: [sw, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-ner-swahili` is a Swahili model originally trained by `mbeukman`. + +## Predicted Entities + +`DATE`, `PER`, `ORG`, `LOC` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_ner_swahili_sw_5.5.0_3.0_1725657412884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_ner_swahili_sw_5.5.0_3.0_1725657412884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_ner_swahili","sw") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_ner_swahili","sw") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("sw.ner.xlmr_roberta.base_finetuned_swahili.by_mbeukman").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_ner_swahili| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|sw| +|Size:|776.7 MB| + +## References + +References + +- https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-ner-swahili +- https://arxiv.org/abs/2103.11811 +- https://github.com/Michael-Beukman/NERTransfer +- https://github.com/masakhane-io/masakhane-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw.md new file mode 100644 index 00000000000000..b896fa6af14786 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Swahili (macrolanguage) xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline pipeline XlmRoBertaForTokenClassification from mbeukman +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline +date: 2024-09-06 +tags: [sw, open_source, pipeline, onnx] +task: Named Entity Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline` is a Swahili (macrolanguage) model originally trained by mbeukman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw_5.5.0_3.0_1725658513721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline_sw_5.5.0_3.0_1725658513721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline", lang = "sw") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline", lang = "sw") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sw| +|Size:|1.0 GB| + +## References + +https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-swahili-finetuned-ner-swahili + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw.md new file mode 100644 index 00000000000000..dee8eda5883ada --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw.md @@ -0,0 +1,115 @@ +--- +layout: model +title: Swahili XLMRobertaForTokenClassification Base Cased model (from mbeukman) +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili +date: 2024-09-06 +tags: [sw, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-swahili-finetuned-ner-swahili` is a Swahili model originally trained by `mbeukman`. + +## Predicted Entities + +`DATE`, `PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw_5.5.0_3.0_1725658464567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili_sw_5.5.0_3.0_1725658464567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili","sw") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili","sw") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("sw.ner.xlmr_roberta.base_finetuned_swahili.v2.by_mbeukman").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_swahili_finetuned_ner_swahili| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|sw| +|Size:|1.0 GB| + +## References + +References + +- https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-swahili-finetuned-ner-swahili +- https://arxiv.org/abs/2103.11811 +- https://github.com/Michael-Beukman/NERTransfer +- https://github.com/masakhane-io/masakhane-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en.md new file mode 100644 index 00000000000000..9df2801b7ac171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmroberta_ner_base_uncased_mit_restaurant_pipeline pipeline XlmRoBertaForTokenClassification from tner +author: John Snow Labs +name: xlmroberta_ner_base_uncased_mit_restaurant_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_uncased_mit_restaurant_pipeline` is a English model originally trained by tner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en_5.5.0_3.0_1725592598218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_uncased_mit_restaurant_pipeline_en_5.5.0_3.0_1725592598218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_base_uncased_mit_restaurant_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_base_uncased_mit_restaurant_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_uncased_mit_restaurant_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|773.5 MB| + +## References + +https://huggingface.co/tner/xlm-roberta-base-uncased-mit-restaurant + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx.md new file mode 100644 index 00000000000000..095ffc3c82bb94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from cj-mills +author: John Snow Labs +name: xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline` is a Multilingual model originally trained by cj-mills. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx_5.5.0_3.0_1725593917053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline_xx_5.5.0_3.0_1725593917053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_cj_mills_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|856.1 MB| + +## References + +https://huggingface.co/cj-mills/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_xx.md new file mode 100644 index 00000000000000..1fbc258d18045b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_cj_mills_base_finetuned_panx_xx.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Multilingual XLMRobertaForTokenClassification Base Cased model (from cj-mills) +author: John Snow Labs +name: xlmroberta_ner_cj_mills_base_finetuned_panx +date: 2024-09-06 +tags: [de, fr, open_source, xlm_roberta, ner, xx, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de-fr` is a Multilingual model originally trained by `cj-mills`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_cj_mills_base_finetuned_panx_xx_5.5.0_3.0_1725593847072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_cj_mills_base_finetuned_panx_xx_5.5.0_3.0_1725593847072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_cj_mills_base_finetuned_panx","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_cj_mills_base_finetuned_panx","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.ner.xlmr_roberta.base_finetuned.by_cj_mills").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_cj_mills_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|856.0 MB| + +## References + +References + +- https://huggingface.co/cj-mills/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_haesun_base_finetuned_panx_en.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_haesun_base_finetuned_panx_en.md new file mode 100644 index 00000000000000..e06a927a1656a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_haesun_base_finetuned_panx_en.md @@ -0,0 +1,113 @@ +--- +layout: model +title: English XLMRobertaForTokenClassification Base Cased model (from haesun) +author: John Snow Labs +name: xlmroberta_ner_haesun_base_finetuned_panx +date: 2024-09-06 +tags: [en, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-en` is a English model originally trained by `haesun`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_haesun_base_finetuned_panx_en_5.5.0_3.0_1725592521931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_haesun_base_finetuned_panx_en_5.5.0_3.0_1725592521931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_haesun_base_finetuned_panx","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_haesun_base_finetuned_panx","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.ner.xlmr_roberta.xtreme.base_finetuned.by_haesun").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_haesun_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|814.3 MB| + +## References + +References + +- https://huggingface.co/haesun/xlm-roberta-base-finetuned-panx-en +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jamesmarcel_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jamesmarcel_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..7f6827e67b7d1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jamesmarcel_base_finetuned_panx_de.md @@ -0,0 +1,113 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from jamesmarcel) +author: John Snow Labs +name: xlmroberta_ner_jamesmarcel_base_finetuned_panx +date: 2024-09-06 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `jamesmarcel`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jamesmarcel_base_finetuned_panx_de_5.5.0_3.0_1725592826400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jamesmarcel_base_finetuned_panx_de_5.5.0_3.0_1725592826400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_jamesmarcel_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_jamesmarcel_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_jamesmarcel").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_jamesmarcel_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.8 MB| + +## References + +References + +- https://huggingface.co/jamesmarcel/xlm-roberta-base-finetuned-panx-de +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx.md new file mode 100644 index 00000000000000..19d0a8b89f495e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline pipeline XlmRoBertaForTokenClassification from jgriffi +author: John Snow Labs +name: xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline` is a Multilingual model originally trained by jgriffi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725592310458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725592310458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_jgriffi_base_finetuned_panx_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|861.9 MB| + +## References + +https://huggingface.co/jgriffi/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx.md new file mode 100644 index 00000000000000..2aac3ba434c4d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Multilingual XLMRobertaForTokenClassification Base Cased model (from jgriffi) +author: John Snow Labs +name: xlmroberta_ner_jgriffi_base_finetuned_panx_all +date: 2024-09-06 +tags: [xx, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-all` is a Multilingual model originally trained by `jgriffi`. + +## Predicted Entities + +`ORG`, `LOC`, `PER` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx_5.5.0_3.0_1725592247705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_jgriffi_base_finetuned_panx_all_xx_5.5.0_3.0_1725592247705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_jgriffi_base_finetuned_panx_all","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_jgriffi_base_finetuned_panx_all","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.ner.xlmr_roberta.base_finetuned_panx_all.by_jgriffi").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_jgriffi_base_finetuned_panx_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|861.8 MB| + +## References + +References + +- https://huggingface.co/jgriffi/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..141c9e61f622d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_de.md @@ -0,0 +1,113 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from naomiyjchen) +author: John Snow Labs +name: xlmroberta_ner_naomiyjchen_base_finetuned_panx +date: 2024-09-06 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `naomiyjchen`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_naomiyjchen_base_finetuned_panx_de_5.5.0_3.0_1725657321111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_naomiyjchen_base_finetuned_panx_de_5.5.0_3.0_1725657321111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_naomiyjchen_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_naomiyjchen_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_naomiyjchen").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_naomiyjchen_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.8 MB| + +## References + +References + +- https://huggingface.co/naomiyjchen/xlm-roberta-base-finetuned-panx-de +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..c4448969867aa3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from naomiyjchen +author: John Snow Labs +name: xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline` is a German model originally trained by naomiyjchen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725657388039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725657388039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_naomiyjchen_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.8 MB| + +## References + +https://huggingface.co/naomiyjchen/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..4b072b8934f68a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_olpa_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from olpa +author: John Snow Labs +name: xlmroberta_ner_olpa_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_olpa_base_finetuned_panx_pipeline` is a German model originally trained by olpa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725591800755.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_olpa_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725591800755.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_olpa_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_olpa_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_olpa_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.8 MB| + +## References + +https://huggingface.co/olpa/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..3ccad8222e5503 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_de.md @@ -0,0 +1,113 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from Rishav-hub) +author: John Snow Labs +name: xlmroberta_ner_rishav_hub_base_finetuned_panx +date: 2024-09-06 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `Rishav-hub`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rishav_hub_base_finetuned_panx_de_5.5.0_3.0_1725656897311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rishav_hub_base_finetuned_panx_de_5.5.0_3.0_1725656897311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_rishav_hub_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_rishav_hub_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_rishav_hub").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_rishav_hub_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.8 MB| + +## References + +References + +- https://huggingface.co/Rishav-hub/xlm-roberta-base-finetuned-panx-de +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..2da03a9110b5ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from Rishav-hub +author: John Snow Labs +name: xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline` is a German model originally trained by Rishav-hub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725656963736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725656963736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_rishav_hub_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.8 MB| + +## References + +https://huggingface.co/Rishav-hub/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..da3d43e8fecb54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from SelamatPagi +author: John Snow Labs +name: xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline` is a German model originally trained by SelamatPagi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725592628255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725592628255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_selamatpagi_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.8 MB| + +## References + +https://huggingface.co/SelamatPagi/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx.md new file mode 100644 index 00000000000000..2088b2fa11b705 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline pipeline XlmRoBertaForTokenClassification from skr3178 +author: John Snow Labs +name: xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline +date: 2024-09-06 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline` is a Multilingual model originally trained by skr3178. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725593989674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725593989674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_skr3178_base_finetuned_panx_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|861.0 MB| + +## References + +https://huggingface.co/skr3178/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_xx.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_xx.md new file mode 100644 index 00000000000000..d3453cc2c05331 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_skr3178_base_finetuned_panx_all_xx.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Multilingual XLMRobertaForTokenClassification Base Cased model (from skr3178) +author: John Snow Labs +name: xlmroberta_ner_skr3178_base_finetuned_panx_all +date: 2024-09-06 +tags: [xx, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-all` is a Multilingual model originally trained by `skr3178`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_skr3178_base_finetuned_panx_all_xx_5.5.0_3.0_1725593926678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_skr3178_base_finetuned_panx_all_xx_5.5.0_3.0_1725593926678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_skr3178_base_finetuned_panx_all","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_skr3178_base_finetuned_panx_all","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.ner.xlmr_roberta.base_finetuned_panx_all.by_skr3178").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_skr3178_base_finetuned_panx_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|861.0 MB| + +## References + +References + +- https://huggingface.co/skr3178/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..90c5a91326015a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_de.md @@ -0,0 +1,112 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from xugenpeng) +author: John Snow Labs +name: xlmroberta_ner_xugenpeng_base_finetuned_panx +date: 2024-09-06 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `xugenpeng`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_xugenpeng_base_finetuned_panx_de_5.5.0_3.0_1725657887451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_xugenpeng_base_finetuned_panx_de_5.5.0_3.0_1725657887451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_xugenpeng_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_xugenpeng_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.base_finetuned.by_xugenpeng").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_xugenpeng_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.2 MB| + +## References + +References + +- https://huggingface.co/xugenpeng/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..3fd349ccb8641e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from xugenpeng +author: John Snow Labs +name: xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline +date: 2024-09-06 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline` is a German model originally trained by xugenpeng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725657954677.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725657954677.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_xugenpeng_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.2 MB| + +## References + +https://huggingface.co/xugenpeng/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_en.md b/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_en.md new file mode 100644 index 00000000000000..0a0b48431f4a8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xml_roberta_model_qa XlmRoBertaForQuestionAnswering from Kyrmasch +author: John Snow Labs +name: xml_roberta_model_qa +date: 2024-09-06 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xml_roberta_model_qa` is a English model originally trained by Kyrmasch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xml_roberta_model_qa_en_5.5.0_3.0_1725631000353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xml_roberta_model_qa_en_5.5.0_3.0_1725631000353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xml_roberta_model_qa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xml_roberta_model_qa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xml_roberta_model_qa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|884.1 MB| + +## References + +https://huggingface.co/Kyrmasch/xml-roberta-model-qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_pipeline_en.md new file mode 100644 index 00000000000000..9fcd3a40968534 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-xml_roberta_model_qa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xml_roberta_model_qa_pipeline pipeline XlmRoBertaForQuestionAnswering from Kyrmasch +author: John Snow Labs +name: xml_roberta_model_qa_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xml_roberta_model_qa_pipeline` is a English model originally trained by Kyrmasch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xml_roberta_model_qa_pipeline_en_5.5.0_3.0_1725631070581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xml_roberta_model_qa_pipeline_en_5.5.0_3.0_1725631070581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xml_roberta_model_qa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xml_roberta_model_qa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xml_roberta_model_qa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|884.1 MB| + +## References + +https://huggingface.co/Kyrmasch/xml-roberta-model-qa + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_en.md b/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_en.md new file mode 100644 index 00000000000000..98bed800caced5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English yappychappysimple DistilBertForTokenClassification from CoRGI-HF +author: John Snow Labs +name: yappychappysimple +date: 2024-09-06 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yappychappysimple` is a English model originally trained by CoRGI-HF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yappychappysimple_en_5.5.0_3.0_1725599264898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yappychappysimple_en_5.5.0_3.0_1725599264898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("yappychappysimple","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("yappychappysimple", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yappychappysimple| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/CoRGI-HF/YappyChappySimple \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_pipeline_en.md new file mode 100644 index 00000000000000..89bad9b7e7003f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-yappychappysimple_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English yappychappysimple_pipeline pipeline DistilBertForTokenClassification from CoRGI-HF +author: John Snow Labs +name: yappychappysimple_pipeline +date: 2024-09-06 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yappychappysimple_pipeline` is a English model originally trained by CoRGI-HF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yappychappysimple_pipeline_en_5.5.0_3.0_1725599277233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yappychappysimple_pipeline_en_5.5.0_3.0_1725599277233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("yappychappysimple_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("yappychappysimple_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yappychappysimple_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.9 MB| + +## References + +https://huggingface.co/CoRGI-HF/YappyChappySimple + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_pipeline_ve.md b/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_pipeline_ve.md new file mode 100644 index 00000000000000..a7f39beae9d070 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_pipeline_ve.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Venda zabantu_sot_ven_170m_pipeline pipeline XlmRoBertaEmbeddings from dsfsi +author: John Snow Labs +name: zabantu_sot_ven_170m_pipeline +date: 2024-09-06 +tags: [ve, open_source, pipeline, onnx] +task: Embeddings +language: ve +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`zabantu_sot_ven_170m_pipeline` is a Venda model originally trained by dsfsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zabantu_sot_ven_170m_pipeline_ve_5.5.0_3.0_1725596424963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zabantu_sot_ven_170m_pipeline_ve_5.5.0_3.0_1725596424963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("zabantu_sot_ven_170m_pipeline", lang = "ve") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("zabantu_sot_ven_170m_pipeline", lang = "ve") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zabantu_sot_ven_170m_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ve| +|Size:|646.5 MB| + +## References + +https://huggingface.co/dsfsi/zabantu-sot-ven-170m + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_ve.md b/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_ve.md new file mode 100644 index 00000000000000..df0a5c595986e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-06-zabantu_sot_ven_170m_ve.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Venda zabantu_sot_ven_170m XlmRoBertaEmbeddings from dsfsi +author: John Snow Labs +name: zabantu_sot_ven_170m +date: 2024-09-06 +tags: [ve, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: ve +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`zabantu_sot_ven_170m` is a Venda model originally trained by dsfsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zabantu_sot_ven_170m_ve_5.5.0_3.0_1725596393160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zabantu_sot_ven_170m_ve_5.5.0_3.0_1725596393160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("zabantu_sot_ven_170m","ve") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("zabantu_sot_ven_170m","ve") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zabantu_sot_ven_170m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|ve| +|Size:|646.5 MB| + +## References + +https://huggingface.co/dsfsi/zabantu-sot-ven-170m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_en.md b/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_en.md new file mode 100644 index 00000000000000..48cba2ed5d3bb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 2020_q2_full_tweets_combined90 RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q2_full_tweets_combined90 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q2_full_tweets_combined90` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q2_full_tweets_combined90_en_5.5.0_3.0_1725673250099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q2_full_tweets_combined90_en_5.5.0_3.0_1725673250099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("2020_q2_full_tweets_combined90","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("2020_q2_full_tweets_combined90","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q2_full_tweets_combined90| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q2-full_tweets_combined90 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_pipeline_en.md new file mode 100644 index 00000000000000..c7406c5c055553 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-2020_q2_full_tweets_combined90_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English 2020_q2_full_tweets_combined90_pipeline pipeline RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: 2020_q2_full_tweets_combined90_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2020_q2_full_tweets_combined90_pipeline` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2020_q2_full_tweets_combined90_pipeline_en_5.5.0_3.0_1725673271790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2020_q2_full_tweets_combined90_pipeline_en_5.5.0_3.0_1725673271790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("2020_q2_full_tweets_combined90_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("2020_q2_full_tweets_combined90_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2020_q2_full_tweets_combined90_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/DouglasPontes/2020-Q2-full_tweets_combined90 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-4_datasets_fake_news_with_balanced_with_add_one_sentence_en.md b/docs/_posts/ahmedlone127/2024-09-07-4_datasets_fake_news_with_balanced_with_add_one_sentence_en.md new file mode 100644 index 00000000000000..288eeca6a2a090 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-4_datasets_fake_news_with_balanced_with_add_one_sentence_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English 4_datasets_fake_news_with_balanced_with_add_one_sentence DistilBertForSequenceClassification from littlepinhorse +author: John Snow Labs +name: 4_datasets_fake_news_with_balanced_with_add_one_sentence +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`4_datasets_fake_news_with_balanced_with_add_one_sentence` is a English model originally trained by littlepinhorse. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/4_datasets_fake_news_with_balanced_with_add_one_sentence_en_5.5.0_3.0_1725674961903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/4_datasets_fake_news_with_balanced_with_add_one_sentence_en_5.5.0_3.0_1725674961903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("4_datasets_fake_news_with_balanced_with_add_one_sentence","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("4_datasets_fake_news_with_balanced_with_add_one_sentence", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|4_datasets_fake_news_with_balanced_with_add_one_sentence| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/littlepinhorse/4_datasets_fake_news_with_Balanced_With_add_one_sentence \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-accu_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-accu_3_en.md new file mode 100644 index 00000000000000..b57af36972f090 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-accu_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English accu_3 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_3 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_3` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_3_en_5.5.0_3.0_1725717712270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_3_en_5.5.0_3.0_1725717712270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("accu_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-accu_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-accu_3_pipeline_en.md new file mode 100644 index 00000000000000..e2c6b426063d10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-accu_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English accu_3_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: accu_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`accu_3_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/accu_3_pipeline_en_5.5.0_3.0_1725717733874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/accu_3_pipeline_en_5.5.0_3.0_1725717733874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("accu_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("accu_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|accu_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Accu_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_en.md b/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_en.md new file mode 100644 index 00000000000000..31a01593504b73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English acrossapps_ndd_petclinic_test_content DistilBertForSequenceClassification from lgk03 +author: John Snow Labs +name: acrossapps_ndd_petclinic_test_content +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`acrossapps_ndd_petclinic_test_content` is a English model originally trained by lgk03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_petclinic_test_content_en_5.5.0_3.0_1725674428538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_petclinic_test_content_en_5.5.0_3.0_1725674428538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("acrossapps_ndd_petclinic_test_content","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("acrossapps_ndd_petclinic_test_content", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|acrossapps_ndd_petclinic_test_content| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lgk03/ACROSSAPPS_NDD-petclinic_test-content \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_pipeline_en.md new file mode 100644 index 00000000000000..17ea7320acf7af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-acrossapps_ndd_petclinic_test_content_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English acrossapps_ndd_petclinic_test_content_pipeline pipeline DistilBertForSequenceClassification from lgk03 +author: John Snow Labs +name: acrossapps_ndd_petclinic_test_content_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`acrossapps_ndd_petclinic_test_content_pipeline` is a English model originally trained by lgk03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_petclinic_test_content_pipeline_en_5.5.0_3.0_1725674440127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/acrossapps_ndd_petclinic_test_content_pipeline_en_5.5.0_3.0_1725674440127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("acrossapps_ndd_petclinic_test_content_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("acrossapps_ndd_petclinic_test_content_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|acrossapps_ndd_petclinic_test_content_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lgk03/ACROSSAPPS_NDD-petclinic_test-content + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-action_policy_plans_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-07-action_policy_plans_classifier_en.md new file mode 100644 index 00000000000000..a53d11c01bb3d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-action_policy_plans_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English action_policy_plans_classifier MPNetEmbeddings from ppsingh +author: John Snow Labs +name: action_policy_plans_classifier +date: 2024-09-07 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`action_policy_plans_classifier` is a English model originally trained by ppsingh. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/action_policy_plans_classifier_en_5.5.0_3.0_1725733461054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/action_policy_plans_classifier_en_5.5.0_3.0_1725733461054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("action_policy_plans_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("action_policy_plans_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|action_policy_plans_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.1 MB| + +## References + +References + +https://huggingface.co/ppsingh/action-policy-plans-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-agric_eng_lug_en.md b/docs/_posts/ahmedlone127/2024-09-07-agric_eng_lug_en.md new file mode 100644 index 00000000000000..185009154f4864 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-agric_eng_lug_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English agric_eng_lug MarianTransformer from hellennamulinda +author: John Snow Labs +name: agric_eng_lug +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`agric_eng_lug` is a English model originally trained by hellennamulinda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/agric_eng_lug_en_5.5.0_3.0_1725746729730.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/agric_eng_lug_en_5.5.0_3.0_1725746729730.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("agric_eng_lug","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("agric_eng_lug","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|agric_eng_lug| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.3 MB| + +## References + +https://huggingface.co/hellennamulinda/agric-eng-lug \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ahisto_ner_model_s_mu_nlpc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ahisto_ner_model_s_mu_nlpc_pipeline_en.md new file mode 100644 index 00000000000000..89334ded5db86c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ahisto_ner_model_s_mu_nlpc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ahisto_ner_model_s_mu_nlpc_pipeline pipeline XlmRoBertaForTokenClassification from MU-NLPC +author: John Snow Labs +name: ahisto_ner_model_s_mu_nlpc_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ahisto_ner_model_s_mu_nlpc_pipeline` is a English model originally trained by MU-NLPC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ahisto_ner_model_s_mu_nlpc_pipeline_en_5.5.0_3.0_1725687972753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ahisto_ner_model_s_mu_nlpc_pipeline_en_5.5.0_3.0_1725687972753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ahisto_ner_model_s_mu_nlpc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ahisto_ner_model_s_mu_nlpc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ahisto_ner_model_s_mu_nlpc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/MU-NLPC/ahisto-ner-model-s + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_ko.md b/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_ko.md new file mode 100644 index 00000000000000..7bcbf23ec008c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_ko.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Korean albert_kor_base_tweak BertEmbeddings from smartmind +author: John Snow Labs +name: albert_kor_base_tweak +date: 2024-09-07 +tags: [ko, open_source, onnx, embeddings, bert] +task: Embeddings +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_kor_base_tweak` is a Korean model originally trained by smartmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_kor_base_tweak_ko_5.5.0_3.0_1725696288426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_kor_base_tweak_ko_5.5.0_3.0_1725696288426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("albert_kor_base_tweak","ko") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("albert_kor_base_tweak","ko") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_kor_base_tweak| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|ko| +|Size:|47.6 MB| + +## References + +https://huggingface.co/smartmind/albert-kor-base-tweak \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_pipeline_ko.md b/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_pipeline_ko.md new file mode 100644 index 00000000000000..f3010449efeadc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_kor_base_tweak_pipeline_ko.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Korean albert_kor_base_tweak_pipeline pipeline BertEmbeddings from smartmind +author: John Snow Labs +name: albert_kor_base_tweak_pipeline +date: 2024-09-07 +tags: [ko, open_source, pipeline, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_kor_base_tweak_pipeline` is a Korean model originally trained by smartmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_kor_base_tweak_pipeline_ko_5.5.0_3.0_1725696291080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_kor_base_tweak_pipeline_ko_5.5.0_3.0_1725696291080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_kor_base_tweak_pipeline", lang = "ko") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_kor_base_tweak_pipeline", lang = "ko") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_kor_base_tweak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ko| +|Size:|47.6 MB| + +## References + +https://huggingface.co/smartmind/albert-kor-base-tweak + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_en.md new file mode 100644 index 00000000000000..6fca7ead94e5de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_minebgsd01 AlbertForSequenceClassification from minebgsd01 +author: John Snow Labs +name: albert_minebgsd01 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_minebgsd01` is a English model originally trained by minebgsd01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_minebgsd01_en_5.5.0_3.0_1725732076089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_minebgsd01_en_5.5.0_3.0_1725732076089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_minebgsd01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_minebgsd01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_minebgsd01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/minebgsd01/Albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_pipeline_en.md new file mode 100644 index 00000000000000..7dc5b82cfede3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_minebgsd01_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_minebgsd01_pipeline pipeline AlbertForSequenceClassification from minebgsd01 +author: John Snow Labs +name: albert_minebgsd01_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_minebgsd01_pipeline` is a English model originally trained by minebgsd01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_minebgsd01_pipeline_en_5.5.0_3.0_1725732078441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_minebgsd01_pipeline_en_5.5.0_3.0_1725732078441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_minebgsd01_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_minebgsd01_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_minebgsd01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/minebgsd01/Albert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_en.md new file mode 100644 index 00000000000000..825c14e4fd84b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_test_model_2 DistilBertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: albert_test_model_2 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_test_model_2` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_test_model_2_en_5.5.0_3.0_1725674385176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_test_model_2_en_5.5.0_3.0_1725674385176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("albert_test_model_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("albert_test_model_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_test_model_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/KalaiselvanD/albert_test_model_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_pipeline_en.md new file mode 100644 index 00000000000000..b5e9d72ff3079e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_test_model_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_test_model_2_pipeline pipeline DistilBertForSequenceClassification from KalaiselvanD +author: John Snow Labs +name: albert_test_model_2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_test_model_2_pipeline` is a English model originally trained by KalaiselvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_test_model_2_pipeline_en_5.5.0_3.0_1725674396601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_test_model_2_pipeline_en_5.5.0_3.0_1725674396601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_test_model_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_test_model_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_test_model_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/KalaiselvanD/albert_test_model_2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_tiny_chinese_david_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_tiny_chinese_david_ner_en.md new file mode 100644 index 00000000000000..743395e1f05e18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_tiny_chinese_david_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_tiny_chinese_david_ner BertForTokenClassification from davidliu1110 +author: John Snow Labs +name: albert_tiny_chinese_david_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tiny_chinese_david_ner` is a English model originally trained by davidliu1110. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_david_ner_en_5.5.0_3.0_1725734968623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tiny_chinese_david_ner_en_5.5.0_3.0_1725734968623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("albert_tiny_chinese_david_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("albert_tiny_chinese_david_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tiny_chinese_david_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|15.1 MB| + +## References + +https://huggingface.co/davidliu1110/albert-tiny-chinese-david-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_pipeline_tr.md new file mode 100644 index 00000000000000..f144573e350679 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish albert_turkish_turkish_spam_email_pipeline pipeline AlbertForSequenceClassification from anilguven +author: John Snow Labs +name: albert_turkish_turkish_spam_email_pipeline +date: 2024-09-07 +tags: [tr, open_source, pipeline, onnx] +task: Text Classification +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_turkish_turkish_spam_email_pipeline` is a Turkish model originally trained by anilguven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_turkish_turkish_spam_email_pipeline_tr_5.5.0_3.0_1725732930022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_turkish_turkish_spam_email_pipeline_tr_5.5.0_3.0_1725732930022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_turkish_turkish_spam_email_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_turkish_turkish_spam_email_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_turkish_turkish_spam_email_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|45.1 MB| + +## References + +https://huggingface.co/anilguven/albert_tr_turkish_spam_email + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_tr.md b/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_tr.md new file mode 100644 index 00000000000000..abd14eb299bc18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_turkish_turkish_spam_email_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish albert_turkish_turkish_spam_email AlbertForSequenceClassification from anilguven +author: John Snow Labs +name: albert_turkish_turkish_spam_email +date: 2024-09-07 +tags: [tr, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_turkish_turkish_spam_email` is a Turkish model originally trained by anilguven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_turkish_turkish_spam_email_tr_5.5.0_3.0_1725732927623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_turkish_turkish_spam_email_tr_5.5.0_3.0_1725732927623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_turkish_turkish_spam_email","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_turkish_turkish_spam_email", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_turkish_turkish_spam_email| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|tr| +|Size:|45.1 MB| + +## References + +https://huggingface.co/anilguven/albert_tr_turkish_spam_email \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_en.md new file mode 100644 index 00000000000000..1e570cf6af2d70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_tweet AlbertForSequenceClassification from Sangmitra-06 +author: John Snow Labs +name: albert_tweet +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tweet` is a English model originally trained by Sangmitra-06. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tweet_en_5.5.0_3.0_1725732044678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tweet_en_5.5.0_3.0_1725732044678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tweet| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Sangmitra-06/Albert_tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_pipeline_en.md new file mode 100644 index 00000000000000..674a68d8250c28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-albert_tweet_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_tweet_pipeline pipeline AlbertForSequenceClassification from Sangmitra-06 +author: John Snow Labs +name: albert_tweet_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tweet_pipeline` is a English model originally trained by Sangmitra-06. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tweet_pipeline_en_5.5.0_3.0_1725732047250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tweet_pipeline_en_5.5.0_3.0_1725732047250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_tweet_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_tweet_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tweet_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Sangmitra-06/Albert_tweet + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-aliens_en.md b/docs/_posts/ahmedlone127/2024-09-07-aliens_en.md new file mode 100644 index 00000000000000..160a1fb23a2152 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-aliens_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English aliens DistilBertForSequenceClassification from arkalon +author: John Snow Labs +name: aliens +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aliens` is a English model originally trained by arkalon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aliens_en_5.5.0_3.0_1725674612895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aliens_en_5.5.0_3.0_1725674612895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("aliens","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("aliens", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aliens| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/arkalon/aliens \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en.md new file mode 100644 index 00000000000000..c3535d3d6ca2bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3 MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en_5.5.0_3.0_1725703162474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3_en_5.5.0_3.0_1725703162474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_newtriplets_v2_lr_2e_7_m_5_e_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-newtriplets-v2-lr-2e-7-m-5-e-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_20240102_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_20240102_en.md new file mode 100644 index 00000000000000..93740e0f9dd431 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_20240102_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English all_mpnet_base_v2_20240102 MPNetForSequenceClassification from Kevinger +author: John Snow Labs +name: all_mpnet_base_v2_20240102 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_20240102` is a English model originally trained by Kevinger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_20240102_en_5.5.0_3.0_1725733223504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_20240102_en_5.5.0_3.0_1725733223504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("all_mpnet_base_v2_20240102","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("all_mpnet_base_v2_20240102", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_20240102| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.2 MB| + +## References + +https://huggingface.co/Kevinger/all-mpnet-base-v2-20240102 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_ledgar_full_contrastive_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_ledgar_full_contrastive_en.md new file mode 100644 index 00000000000000..d2bcbc80600449 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_ledgar_full_contrastive_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_ledgar_full_contrastive MPNetEmbeddings from beautifulpichai +author: John Snow Labs +name: all_mpnet_base_v2_ledgar_full_contrastive +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_ledgar_full_contrastive` is a English model originally trained by beautifulpichai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ledgar_full_contrastive_en_5.5.0_3.0_1725703448541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_ledgar_full_contrastive_en_5.5.0_3.0_1725703448541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_ledgar_full_contrastive","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_ledgar_full_contrastive","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_ledgar_full_contrastive| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/beautifulpichai/all-mpnet-base-v2-ledgar-full-contrastive \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en.md new file mode 100644 index 00000000000000..53420d1ca3fa23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline pipeline MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en_5.5.0_3.0_1725702895777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline_en_5.5.0_3.0_1725702895777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_lr_5e_7_margin_1_epoch_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-v2-lr-5e-7-margin-1-epoch-1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en.md new file mode 100644 index 00000000000000..3fcf98e57a2afa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64 MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en_5.5.0_3.0_1725703184088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_en_5.5.0_3.0_1725703184088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-lr5e-8-margin-1-ep-3-bs-64 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en.md new file mode 100644 index 00000000000000..b077ad2b0926a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline pipeline MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en_5.5.0_3.0_1725703207514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline_en_5.5.0_3.0_1725703207514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_lr5e_8_margin_1_ep_3_bosnian_64_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-lr5e-8-margin-1-ep-3-bs-64 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en.md b/docs/_posts/ahmedlone127/2024-09-07-amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en.md new file mode 100644 index 00000000000000..bb5c54356ed441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing RoBertaForSequenceClassification from ltuzova +author: John Snow Labs +name: amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing` is a English model originally trained by ltuzova. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en_5.5.0_3.0_1725680413589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing_en_5.5.0_3.0_1725680413589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|amazon_helpfulness_classification_on_tapt_pretrained_norwegian_condencing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.6 MB| + +## References + +https://huggingface.co/ltuzova/amazon_helpfulness_classification_on_TAPT_pretrained_no_condencing \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_ar.md new file mode 100644 index 00000000000000..73aca649e0f0ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic arabic_bert_model RoBertaEmbeddings from Rohan-Kurdekar +author: John Snow Labs +name: arabic_bert_model +date: 2024-09-07 +tags: [ar, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_bert_model` is a Arabic model originally trained by Rohan-Kurdekar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_bert_model_ar_5.5.0_3.0_1725672819661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_bert_model_ar_5.5.0_3.0_1725672819661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("arabic_bert_model","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("arabic_bert_model","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_bert_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ar| +|Size:|311.4 MB| + +## References + +https://huggingface.co/Rohan-Kurdekar/Arabic_Bert_Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_pipeline_ar.md new file mode 100644 index 00000000000000..ac3e8a2af26406 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arabic_bert_model_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic arabic_bert_model_pipeline pipeline RoBertaEmbeddings from Rohan-Kurdekar +author: John Snow Labs +name: arabic_bert_model_pipeline +date: 2024-09-07 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_bert_model_pipeline` is a Arabic model originally trained by Rohan-Kurdekar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_bert_model_pipeline_ar_5.5.0_3.0_1725672835576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_bert_model_pipeline_ar_5.5.0_3.0_1725672835576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arabic_bert_model_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arabic_bert_model_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_bert_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|311.4 MB| + +## References + +https://huggingface.co/Rohan-Kurdekar/Arabic_Bert_Model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arbertv2_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arbertv2_ar.md new file mode 100644 index 00000000000000..8120fe63012f3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arbertv2_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic arbertv2 BertEmbeddings from UBC-NLP +author: John Snow Labs +name: arbertv2 +date: 2024-09-07 +tags: [ar, open_source, onnx, embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arbertv2` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arbertv2_ar_5.5.0_3.0_1725696445474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arbertv2_ar_5.5.0_3.0_1725696445474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("arbertv2","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("arbertv2","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arbertv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|ar| +|Size:|607.1 MB| + +## References + +https://huggingface.co/UBC-NLP/ARBERTv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arbertv2_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arbertv2_pipeline_ar.md new file mode 100644 index 00000000000000..ad00a1b09f9d95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arbertv2_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic arbertv2_pipeline pipeline BertEmbeddings from UBC-NLP +author: John Snow Labs +name: arbertv2_pipeline +date: 2024-09-07 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arbertv2_pipeline` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arbertv2_pipeline_ar_5.5.0_3.0_1725696473103.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arbertv2_pipeline_ar_5.5.0_3.0_1725696473103.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arbertv2_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arbertv2_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arbertv2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|607.1 MB| + +## References + +https://huggingface.co/UBC-NLP/ARBERTv2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_en.md new file mode 100644 index 00000000000000..666516b2b1d0dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English argureviews_sentiment_roberta_v1 XlmRoBertaForSequenceClassification from nihiluis +author: John Snow Labs +name: argureviews_sentiment_roberta_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`argureviews_sentiment_roberta_v1` is a English model originally trained by nihiluis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/argureviews_sentiment_roberta_v1_en_5.5.0_3.0_1725712973871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/argureviews_sentiment_roberta_v1_en_5.5.0_3.0_1725712973871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("argureviews_sentiment_roberta_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("argureviews_sentiment_roberta_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|argureviews_sentiment_roberta_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|834.2 MB| + +## References + +https://huggingface.co/nihiluis/argureviews-sentiment-roberta_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_pipeline_en.md new file mode 100644 index 00000000000000..6225c2b76bea9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-argureviews_sentiment_roberta_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English argureviews_sentiment_roberta_v1_pipeline pipeline XlmRoBertaForSequenceClassification from nihiluis +author: John Snow Labs +name: argureviews_sentiment_roberta_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`argureviews_sentiment_roberta_v1_pipeline` is a English model originally trained by nihiluis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/argureviews_sentiment_roberta_v1_pipeline_en_5.5.0_3.0_1725713061496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/argureviews_sentiment_roberta_v1_pipeline_en_5.5.0_3.0_1725713061496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("argureviews_sentiment_roberta_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("argureviews_sentiment_roberta_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|argureviews_sentiment_roberta_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.2 MB| + +## References + +https://huggingface.co/nihiluis/argureviews-sentiment-roberta_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_ar.md new file mode 100644 index 00000000000000..c4e2ca7a423636 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic arywiki_20230101_roberta_mlm_bots RoBertaEmbeddings from SaiedAlshahrani +author: John Snow Labs +name: arywiki_20230101_roberta_mlm_bots +date: 2024-09-07 +tags: [ar, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arywiki_20230101_roberta_mlm_bots` is a Arabic model originally trained by SaiedAlshahrani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arywiki_20230101_roberta_mlm_bots_ar_5.5.0_3.0_1725716120617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arywiki_20230101_roberta_mlm_bots_ar_5.5.0_3.0_1725716120617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("arywiki_20230101_roberta_mlm_bots","ar") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("arywiki_20230101_roberta_mlm_bots","ar") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arywiki_20230101_roberta_mlm_bots| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ar| +|Size:|311.2 MB| + +## References + +https://huggingface.co/SaiedAlshahrani/arywiki_20230101_roberta_mlm_bots \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_pipeline_ar.md new file mode 100644 index 00000000000000..e1053869c1a79c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-arywiki_20230101_roberta_mlm_bots_pipeline_ar.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Arabic arywiki_20230101_roberta_mlm_bots_pipeline pipeline RoBertaEmbeddings from SaiedAlshahrani +author: John Snow Labs +name: arywiki_20230101_roberta_mlm_bots_pipeline +date: 2024-09-07 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arywiki_20230101_roberta_mlm_bots_pipeline` is a Arabic model originally trained by SaiedAlshahrani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arywiki_20230101_roberta_mlm_bots_pipeline_ar_5.5.0_3.0_1725716136261.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arywiki_20230101_roberta_mlm_bots_pipeline_ar_5.5.0_3.0_1725716136261.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("arywiki_20230101_roberta_mlm_bots_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("arywiki_20230101_roberta_mlm_bots_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arywiki_20230101_roberta_mlm_bots_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|311.3 MB| + +## References + +https://huggingface.co/SaiedAlshahrani/arywiki_20230101_roberta_mlm_bots + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-augmented_distillbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-augmented_distillbert_en.md new file mode 100644 index 00000000000000..8132faed031560 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-augmented_distillbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English augmented_distillbert DistilBertForSequenceClassification from erostrate9 +author: John Snow Labs +name: augmented_distillbert +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`augmented_distillbert` is a English model originally trained by erostrate9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/augmented_distillbert_en_5.5.0_3.0_1725675148054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/augmented_distillbert_en_5.5.0_3.0_1725675148054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("augmented_distillbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("augmented_distillbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|augmented_distillbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/erostrate9/augmented_distillbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-autotrain_qna_1170143354_en.md b/docs/_posts/ahmedlone127/2024-09-07-autotrain_qna_1170143354_en.md new file mode 100644 index 00000000000000..fcfc59be736797 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-autotrain_qna_1170143354_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English autotrain_qna_1170143354 DistilBertForQuestionAnswering from IDL +author: John Snow Labs +name: autotrain_qna_1170143354 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_qna_1170143354` is a English model originally trained by IDL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_qna_1170143354_en_5.5.0_3.0_1725745598437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_qna_1170143354_en_5.5.0_3.0_1725745598437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("autotrain_qna_1170143354","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("autotrain_qna_1170143354", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_qna_1170143354| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/IDL/autotrain-qna-1170143354 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_en.md b/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_en.md new file mode 100644 index 00000000000000..0db9f2a342681c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bce_reranker_base_v1_maidalun1020 XlmRoBertaForSequenceClassification from maidalun1020 +author: John Snow Labs +name: bce_reranker_base_v1_maidalun1020 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bce_reranker_base_v1_maidalun1020` is a English model originally trained by maidalun1020. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bce_reranker_base_v1_maidalun1020_en_5.5.0_3.0_1725669824427.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bce_reranker_base_v1_maidalun1020_en_5.5.0_3.0_1725669824427.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("bce_reranker_base_v1_maidalun1020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("bce_reranker_base_v1_maidalun1020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bce_reranker_base_v1_maidalun1020| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|981.7 MB| + +## References + +https://huggingface.co/maidalun1020/bce-reranker-base_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_pipeline_en.md new file mode 100644 index 00000000000000..3d404df74526cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bce_reranker_base_v1_maidalun1020_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bce_reranker_base_v1_maidalun1020_pipeline pipeline XlmRoBertaForSequenceClassification from maidalun1020 +author: John Snow Labs +name: bce_reranker_base_v1_maidalun1020_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bce_reranker_base_v1_maidalun1020_pipeline` is a English model originally trained by maidalun1020. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bce_reranker_base_v1_maidalun1020_pipeline_en_5.5.0_3.0_1725669884215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bce_reranker_base_v1_maidalun1020_pipeline_en_5.5.0_3.0_1725669884215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bce_reranker_base_v1_maidalun1020_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bce_reranker_base_v1_maidalun1020_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bce_reranker_base_v1_maidalun1020_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|981.7 MB| + +## References + +https://huggingface.co/maidalun1020/bce-reranker-base_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_hr.md b/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_hr.md new file mode 100644 index 00000000000000..38aa9d986dbadf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_hr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Croatian bcms_bertic_ner BertForTokenClassification from classla +author: John Snow Labs +name: bcms_bertic_ner +date: 2024-09-07 +tags: [hr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: hr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bcms_bertic_ner` is a Croatian model originally trained by classla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bcms_bertic_ner_hr_5.5.0_3.0_1725726734600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bcms_bertic_ner_hr_5.5.0_3.0_1725726734600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bcms_bertic_ner","hr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bcms_bertic_ner", "hr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bcms_bertic_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|hr| +|Size:|412.7 MB| + +## References + +https://huggingface.co/classla/bcms-bertic-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_pipeline_hr.md b/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_pipeline_hr.md new file mode 100644 index 00000000000000..98fdbed3af957a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bcms_bertic_ner_pipeline_hr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Croatian bcms_bertic_ner_pipeline pipeline BertForTokenClassification from classla +author: John Snow Labs +name: bcms_bertic_ner_pipeline +date: 2024-09-07 +tags: [hr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: hr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bcms_bertic_ner_pipeline` is a Croatian model originally trained by classla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bcms_bertic_ner_pipeline_hr_5.5.0_3.0_1725726753667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bcms_bertic_ner_pipeline_hr_5.5.0_3.0_1725726753667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bcms_bertic_ner_pipeline", lang = "hr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bcms_bertic_ner_pipeline", lang = "hr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bcms_bertic_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hr| +|Size:|412.8 MB| + +## References + +https://huggingface.co/classla/bcms-bertic-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-benjys_first_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-benjys_first_model_en.md new file mode 100644 index 00000000000000..3417fc95dc6cf3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-benjys_first_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English benjys_first_model CamemBertEmbeddings from benyjaykay +author: John Snow Labs +name: benjys_first_model +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`benjys_first_model` is a English model originally trained by benyjaykay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/benjys_first_model_en_5.5.0_3.0_1725691289395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/benjys_first_model_en_5.5.0_3.0_1725691289395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("benjys_first_model","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("benjys_first_model","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|benjys_first_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/benyjaykay/benjys-first-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_en.md b/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_en.md new file mode 100644 index 00000000000000..8fafcdadc4bf80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English berel_finetuned_dss_maskedlm BertEmbeddings from yonatanlou +author: John Snow Labs +name: berel_finetuned_dss_maskedlm +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_finetuned_dss_maskedlm` is a English model originally trained by yonatanlou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_finetuned_dss_maskedlm_en_5.5.0_3.0_1725696329021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_finetuned_dss_maskedlm_en_5.5.0_3.0_1725696329021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("berel_finetuned_dss_maskedlm","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("berel_finetuned_dss_maskedlm","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_finetuned_dss_maskedlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/yonatanlou/BEREL-finetuned-DSS-maskedLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_pipeline_en.md new file mode 100644 index 00000000000000..6d99c45b80bf63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-berel_finetuned_dss_maskedlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English berel_finetuned_dss_maskedlm_pipeline pipeline BertEmbeddings from yonatanlou +author: John Snow Labs +name: berel_finetuned_dss_maskedlm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_finetuned_dss_maskedlm_pipeline` is a English model originally trained by yonatanlou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_finetuned_dss_maskedlm_pipeline_en_5.5.0_3.0_1725696360874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_finetuned_dss_maskedlm_pipeline_en_5.5.0_3.0_1725696360874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("berel_finetuned_dss_maskedlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("berel_finetuned_dss_maskedlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_finetuned_dss_maskedlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/yonatanlou/BEREL-finetuned-DSS-maskedLM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_b02_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_b02_en.md new file mode 100644 index 00000000000000..f2376257d65e04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_b02_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_b02 DistilBertForTokenClassification from LazzeKappa +author: John Snow Labs +name: bert_b02 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_b02` is a English model originally trained by LazzeKappa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_b02_en_5.5.0_3.0_1725739491027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_b02_en_5.5.0_3.0_1725739491027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("bert_b02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("bert_b02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_b02| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.5 MB| + +## References + +https://huggingface.co/LazzeKappa/BERT_B02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_b02_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_b02_pipeline_en.md new file mode 100644 index 00000000000000..d5d5002d31075d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_b02_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_b02_pipeline pipeline DistilBertForTokenClassification from LazzeKappa +author: John Snow Labs +name: bert_b02_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_b02_pipeline` is a English model originally trained by LazzeKappa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_b02_pipeline_en_5.5.0_3.0_1725739514272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_b02_pipeline_en_5.5.0_3.0_1725739514272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_b02_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_b02_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_b02_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.5 MB| + +## References + +https://huggingface.co/LazzeKappa/BERT_B02 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_cased_ner_conll2003_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_cased_ner_conll2003_pipeline_en.md new file mode 100644 index 00000000000000..a675a1da5cf5c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_cased_ner_conll2003_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_cased_ner_conll2003_pipeline pipeline BertForTokenClassification from andi611 +author: John Snow Labs +name: bert_base_cased_ner_conll2003_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_ner_conll2003_pipeline` is a English model originally trained by andi611. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_ner_conll2003_pipeline_en_5.5.0_3.0_1725726795923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_ner_conll2003_pipeline_en_5.5.0_3.0_1725726795923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_cased_ner_conll2003_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_cased_ner_conll2003_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_ner_conll2003_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/andi611/bert-base-cased-ner-conll2003 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en.md new file mode 100644 index 00000000000000..4b55104e94563c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_dutch_cased_finetuned_mbert_finetuned_ner DistilBertForTokenClassification from sindhujag26 +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_mbert_finetuned_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_mbert_finetuned_ner` is a English model originally trained by sindhujag26. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en_5.5.0_3.0_1725734158247.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_mbert_finetuned_ner_en_5.5.0_3.0_1725734158247.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("bert_base_dutch_cased_finetuned_mbert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("bert_base_dutch_cased_finetuned_mbert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_mbert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/sindhujag26/bert-base-dutch-cased-finetuned-mBERT-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx.md new file mode 100644 index 00000000000000..17ee29f4b472d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_uncased_finetuned_urdu_pipeline pipeline BertEmbeddings from cxfajar197 +author: John Snow Labs +name: bert_base_multilingual_uncased_finetuned_urdu_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_uncased_finetuned_urdu_pipeline` is a Multilingual model originally trained by cxfajar197. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx_5.5.0_3.0_1725675746304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_urdu_pipeline_xx_5.5.0_3.0_1725675746304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_multilingual_uncased_finetuned_urdu_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_multilingual_uncased_finetuned_urdu_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_uncased_finetuned_urdu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/cxfajar197/bert-base-multilingual-uncased-finetuned-urdu + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_xx.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_xx.md new file mode 100644 index 00000000000000..4bbe7cec67187e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_multilingual_uncased_finetuned_urdu_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_uncased_finetuned_urdu BertEmbeddings from cxfajar197 +author: John Snow Labs +name: bert_base_multilingual_uncased_finetuned_urdu +date: 2024-09-07 +tags: [xx, open_source, onnx, embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_uncased_finetuned_urdu` is a Multilingual model originally trained by cxfajar197. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_urdu_xx_5.5.0_3.0_1725675717276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_urdu_xx_5.5.0_3.0_1725675717276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_base_multilingual_uncased_finetuned_urdu","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_base_multilingual_uncased_finetuned_urdu","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_uncased_finetuned_urdu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/cxfajar197/bert-base-multilingual-uncased-finetuned-urdu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_ner_pii_fn_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_ner_pii_fn_en.md new file mode 100644 index 00000000000000..85c7b2ad552b10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_ner_pii_fn_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_base_ner_pii_fn BertForTokenClassification from vuminhtue +author: John Snow Labs +name: bert_base_ner_pii_fn +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_ner_pii_fn` is a English model originally trained by vuminhtue. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_ner_pii_fn_en_5.5.0_3.0_1725725975105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_ner_pii_fn_en_5.5.0_3.0_1725725975105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_ner_pii_fn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_ner_pii_fn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_ner_pii_fn| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/vuminhtue/Bert_base_NER_PII_Fn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_base_turkish_uncased_ner_tr.md b/docs/_posts/ahmedlone127/2024-09-07-bert_base_turkish_uncased_ner_tr.md new file mode 100644 index 00000000000000..a92fb4e94e7269 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_base_turkish_uncased_ner_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish bert_base_turkish_uncased_ner BertForTokenClassification from saribasmetehan +author: John Snow Labs +name: bert_base_turkish_uncased_ner +date: 2024-09-07 +tags: [tr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_uncased_ner` is a Turkish model originally trained by saribasmetehan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_uncased_ner_tr_5.5.0_3.0_1725726090627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_uncased_ner_tr_5.5.0_3.0_1725726090627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_base_turkish_uncased_ner","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_base_turkish_uncased_ner", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_uncased_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|412.6 MB| + +## References + +https://huggingface.co/saribasmetehan/bert-base-turkish-uncased-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_en.md new file mode 100644 index 00000000000000..a3c13dfe3cf01c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner4_nathali99 BertForTokenClassification from Nathali99 +author: John Snow Labs +name: bert_finetuned_ner4_nathali99 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner4_nathali99` is a English model originally trained by Nathali99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_nathali99_en_5.5.0_3.0_1725690313334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_nathali99_en_5.5.0_3.0_1725690313334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner4_nathali99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_finetuned_ner4_nathali99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner4_nathali99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Nathali99/bert-finetuned-ner4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_pipeline_en.md new file mode 100644 index 00000000000000..dc0c6f98937874 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner4_nathali99_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner4_nathali99_pipeline pipeline BertForTokenClassification from Nathali99 +author: John Snow Labs +name: bert_finetuned_ner4_nathali99_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner4_nathali99_pipeline` is a English model originally trained by Nathali99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_nathali99_pipeline_en_5.5.0_3.0_1725690331695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner4_nathali99_pipeline_en_5.5.0_3.0_1725690331695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner4_nathali99_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner4_nathali99_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner4_nathali99_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Nathali99/bert-finetuned-ner4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_en.md new file mode 100644 index 00000000000000..f2d5a0e09f86e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_finetuned_ner RoBertaForTokenClassification from mdroth +author: John Snow Labs +name: bert_finetuned_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner` is a English model originally trained by mdroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_en_5.5.0_3.0_1725708457573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_en_5.5.0_3.0_1725708457573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bert_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bert_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/mdroth/bert-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..5b8c079840b126 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_finetuned_ner_pipeline pipeline RoBertaForTokenClassification from mdroth +author: John Snow Labs +name: bert_finetuned_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuned_ner_pipeline` is a English model originally trained by mdroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_pipeline_en_5.5.0_3.0_1725708518398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuned_ner_pipeline_en_5.5.0_3.0_1725708518398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/mdroth/bert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_en.md new file mode 100644 index 00000000000000..37171f67340808 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_fromscratch_galician_large RoBertaEmbeddings from fpuentes +author: John Snow Labs +name: bert_fromscratch_galician_large +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fromscratch_galician_large` is a English model originally trained by fpuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fromscratch_galician_large_en_5.5.0_3.0_1725698266173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fromscratch_galician_large_en_5.5.0_3.0_1725698266173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bert_fromscratch_galician_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bert_fromscratch_galician_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fromscratch_galician_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/fpuentes/bert-fromscratch-galician-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_pipeline_en.md new file mode 100644 index 00000000000000..3da14dce9e744e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_fromscratch_galician_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_fromscratch_galician_large_pipeline pipeline RoBertaEmbeddings from fpuentes +author: John Snow Labs +name: bert_fromscratch_galician_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fromscratch_galician_large_pipeline` is a English model originally trained by fpuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fromscratch_galician_large_pipeline_en_5.5.0_3.0_1725698287578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fromscratch_galician_large_pipeline_en_5.5.0_3.0_1725698287578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_fromscratch_galician_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_fromscratch_galician_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fromscratch_galician_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/fpuentes/bert-fromscratch-galician-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_en.md new file mode 100644 index 00000000000000..89ef46ddb72330 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_indo_base_uncased_ner BertForTokenClassification from rdyzakya +author: John Snow Labs +name: bert_indo_base_uncased_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_indo_base_uncased_ner` is a English model originally trained by rdyzakya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_indo_base_uncased_ner_en_5.5.0_3.0_1725690851688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_indo_base_uncased_ner_en_5.5.0_3.0_1725690851688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_indo_base_uncased_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_indo_base_uncased_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_indo_base_uncased_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/rdyzakya/bert-indo-base-uncased-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_pipeline_en.md new file mode 100644 index 00000000000000..dd46122c9b2a9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_indo_base_uncased_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_indo_base_uncased_ner_pipeline pipeline BertForTokenClassification from rdyzakya +author: John Snow Labs +name: bert_indo_base_uncased_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_indo_base_uncased_ner_pipeline` is a English model originally trained by rdyzakya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_indo_base_uncased_ner_pipeline_en_5.5.0_3.0_1725690870696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_indo_base_uncased_ner_pipeline_en_5.5.0_3.0_1725690870696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_indo_base_uncased_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_indo_base_uncased_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_indo_base_uncased_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|411.8 MB| + +## References + +https://huggingface.co/rdyzakya/bert-indo-base-uncased-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_en.md new file mode 100644 index 00000000000000..cb596a3cc40285 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_l6_h768_uncased BertEmbeddings from gaunernst +author: John Snow Labs +name: bert_l6_h768_uncased +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_l6_h768_uncased` is a English model originally trained by gaunernst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_l6_h768_uncased_en_5.5.0_3.0_1725697004234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_l6_h768_uncased_en_5.5.0_3.0_1725697004234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("bert_l6_h768_uncased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("bert_l6_h768_uncased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_l6_h768_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|248.1 MB| + +## References + +https://huggingface.co/gaunernst/bert-L6-H768-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_pipeline_en.md new file mode 100644 index 00000000000000..a5e68c568e4928 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_l6_h768_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_l6_h768_uncased_pipeline pipeline BertEmbeddings from gaunernst +author: John Snow Labs +name: bert_l6_h768_uncased_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_l6_h768_uncased_pipeline` is a English model originally trained by gaunernst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_l6_h768_uncased_pipeline_en_5.5.0_3.0_1725697016039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_l6_h768_uncased_pipeline_en_5.5.0_3.0_1725697016039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_l6_h768_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_l6_h768_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_l6_h768_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|248.1 MB| + +## References + +https://huggingface.co/gaunernst/bert-L6-H768-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en.md new file mode 100644 index 00000000000000..4e262a203e1222 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_large_cased_whole_word_masking_finetuned_squad_google_bert BertForQuestionAnswering from google-bert +author: John Snow Labs +name: bert_large_cased_whole_word_masking_finetuned_squad_google_bert +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_whole_word_masking_finetuned_squad_google_bert` is a English model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en_5.5.0_3.0_1725709901343.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_squad_google_bert_en_5.5.0_3.0_1725709901343.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_large_cased_whole_word_masking_finetuned_squad_google_bert","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_large_cased_whole_word_masking_finetuned_squad_google_bert", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_whole_word_masking_finetuned_squad_google_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/google-bert/bert-large-cased-whole-word-masking-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en.md new file mode 100644 index 00000000000000..8901a964fb0932 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline pipeline BertForQuestionAnswering from google-bert +author: John Snow Labs +name: bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline` is a English model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en_5.5.0_3.0_1725709954139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline_en_5.5.0_3.0_1725709954139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_whole_word_masking_finetuned_squad_google_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/google-bert/bert-large-cased-whole-word-masking-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_en.md new file mode 100644 index 00000000000000..2c7bb8df95cf75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_large_uncased_lmd BertForTokenClassification from Sifr-un +author: John Snow Labs +name: bert_large_uncased_lmd +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_lmd` is a English model originally trained by Sifr-un. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_lmd_en_5.5.0_3.0_1725726337091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_lmd_en_5.5.0_3.0_1725726337091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_large_uncased_lmd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_large_uncased_lmd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_lmd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Sifr-un/bert-large-uncased-LMD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_pipeline_en.md new file mode 100644 index 00000000000000..904547ce83359d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_large_uncased_lmd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_large_uncased_lmd_pipeline pipeline BertForTokenClassification from Sifr-un +author: John Snow Labs +name: bert_large_uncased_lmd_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_lmd_pipeline` is a English model originally trained by Sifr-un. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_lmd_pipeline_en_5.5.0_3.0_1725726394891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_lmd_pipeline_en_5.5.0_3.0_1725726394891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_large_uncased_lmd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_large_uncased_lmd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_lmd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Sifr-un/bert-large-uncased-LMD + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_en.md new file mode 100644 index 00000000000000..9cd61a0d935074 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_portuguese_ner BertForTokenClassification from lfcc +author: John Snow Labs +name: bert_portuguese_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_ner` is a English model originally trained by lfcc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_ner_en_5.5.0_3.0_1725690806789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_ner_en_5.5.0_3.0_1725690806789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bert_portuguese_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bert_portuguese_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/lfcc/bert-portuguese-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_pipeline_en.md new file mode 100644 index 00000000000000..71c13db0dfdf0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_portuguese_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_portuguese_ner_pipeline pipeline BertForTokenClassification from lfcc +author: John Snow Labs +name: bert_portuguese_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_ner_pipeline` is a English model originally trained by lfcc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_ner_pipeline_en_5.5.0_3.0_1725690825180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_ner_pipeline_en_5.5.0_3.0_1725690825180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_portuguese_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_portuguese_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/lfcc/bert-portuguese-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx.md new file mode 100644 index 00000000000000..9dcd660a8f890f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Multilingual bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline pipeline BertForQuestionAnswering from mrm8488 +author: John Snow Labs +name: bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Question Answering +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline` is a Multilingual model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx_5.5.0_3.0_1725671981029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline_xx_5.5.0_3.0_1725671981029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_qa_bert_multi_uncased_finetuned_xquadv1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/mrm8488/bert-multi-uncased-finetuned-xquadv1 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_xx.md b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_xx.md new file mode 100644 index 00000000000000..551dd6410b272b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_bert_multi_uncased_finetuned_xquadv1_xx.md @@ -0,0 +1,86 @@ +--- +layout: model +title: Multilingual bert_qa_bert_multi_uncased_finetuned_xquadv1 BertForQuestionAnswering from mrm8488 +author: John Snow Labs +name: bert_qa_bert_multi_uncased_finetuned_xquadv1 +date: 2024-09-07 +tags: [xx, open_source, onnx, question_answering, bert] +task: Question Answering +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_qa_bert_multi_uncased_finetuned_xquadv1` is a Multilingual model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_qa_bert_multi_uncased_finetuned_xquadv1_xx_5.5.0_3.0_1725671952281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_qa_bert_multi_uncased_finetuned_xquadv1_xx_5.5.0_3.0_1725671952281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_qa_bert_multi_uncased_finetuned_xquadv1","xx") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_qa_bert_multi_uncased_finetuned_xquadv1", "xx") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_qa_bert_multi_uncased_finetuned_xquadv1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/mrm8488/bert-multi-uncased-finetuned-xquadv1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_qa_model_jahanzeb1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_model_jahanzeb1_pipeline_en.md new file mode 100644 index 00000000000000..ffbcd9bf14653e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_qa_model_jahanzeb1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_qa_model_jahanzeb1_pipeline pipeline DistilBertForQuestionAnswering from Jahanzeb1 +author: John Snow Labs +name: bert_qa_model_jahanzeb1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_qa_model_jahanzeb1_pipeline` is a English model originally trained by Jahanzeb1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_qa_model_jahanzeb1_pipeline_en_5.5.0_3.0_1725695564881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_qa_model_jahanzeb1_pipeline_en_5.5.0_3.0_1725695564881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_qa_model_jahanzeb1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_qa_model_jahanzeb1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_qa_model_jahanzeb1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Jahanzeb1/BERT_QA_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_static_malware_detection_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_static_malware_detection_en.md new file mode 100644 index 00000000000000..23c35703d7ab0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_static_malware_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_static_malware_detection DistilBertForSequenceClassification from sibumi +author: John Snow Labs +name: bert_static_malware_detection +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_static_malware_detection` is a English model originally trained by sibumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_static_malware_detection_en_5.5.0_3.0_1725674525882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_static_malware_detection_en_5.5.0_3.0_1725674525882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_static_malware_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_static_malware_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_static_malware_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sibumi/BERT_static_malware-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_en.md new file mode 100644 index 00000000000000..55d02923d7cdd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bert_turkish_fine_tuning_question_answering BertForQuestionAnswering from ProfHuseyin +author: John Snow Labs +name: bert_turkish_fine_tuning_question_answering +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_turkish_fine_tuning_question_answering` is a English model originally trained by ProfHuseyin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_turkish_fine_tuning_question_answering_en_5.5.0_3.0_1725709706844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_turkish_fine_tuning_question_answering_en_5.5.0_3.0_1725709706844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("bert_turkish_fine_tuning_question_answering","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("bert_turkish_fine_tuning_question_answering", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_turkish_fine_tuning_question_answering| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/ProfHuseyin/bert-turkish-fine-tuning-question-answering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_pipeline_en.md new file mode 100644 index 00000000000000..75a3f5ba8aa2c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bert_turkish_fine_tuning_question_answering_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bert_turkish_fine_tuning_question_answering_pipeline pipeline BertForQuestionAnswering from ProfHuseyin +author: John Snow Labs +name: bert_turkish_fine_tuning_question_answering_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_turkish_fine_tuning_question_answering_pipeline` is a English model originally trained by ProfHuseyin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_turkish_fine_tuning_question_answering_pipeline_en_5.5.0_3.0_1725709725482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_turkish_fine_tuning_question_answering_pipeline_en_5.5.0_3.0_1725709725482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_turkish_fine_tuning_question_answering_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_turkish_fine_tuning_question_answering_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_turkish_fine_tuning_question_answering_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/ProfHuseyin/bert-turkish-fine-tuning-question-answering + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_pipeline_zh.md new file mode 100644 index 00000000000000..8bbdd5574e1be0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_pipeline_zh.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Chinese bertspan4ner_base_chinese_pipeline pipeline BertForTokenClassification from shibing624 +author: John Snow Labs +name: bertspan4ner_base_chinese_pipeline +date: 2024-09-07 +tags: [zh, open_source, pipeline, onnx] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertspan4ner_base_chinese_pipeline` is a Chinese model originally trained by shibing624. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertspan4ner_base_chinese_pipeline_zh_5.5.0_3.0_1725690652283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertspan4ner_base_chinese_pipeline_zh_5.5.0_3.0_1725690652283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bertspan4ner_base_chinese_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bertspan4ner_base_chinese_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertspan4ner_base_chinese_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|381.1 MB| + +## References + +https://huggingface.co/shibing624/bertspan4ner-base-chinese + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_zh.md b/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_zh.md new file mode 100644 index 00000000000000..270926043dd1ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bertspan4ner_base_chinese_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese bertspan4ner_base_chinese BertForTokenClassification from shibing624 +author: John Snow Labs +name: bertspan4ner_base_chinese +date: 2024-09-07 +tags: [zh, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertspan4ner_base_chinese` is a Chinese model originally trained by shibing624. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertspan4ner_base_chinese_zh_5.5.0_3.0_1725690635064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertspan4ner_base_chinese_zh_5.5.0_3.0_1725690635064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("bertspan4ner_base_chinese","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("bertspan4ner_base_chinese", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertspan4ner_base_chinese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|zh| +|Size:|381.1 MB| + +## References + +https://huggingface.co/shibing624/bertspan4ner-base-chinese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_en.md b/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_en.md new file mode 100644 index 00000000000000..b7ca5ba7aa3891 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English berturk_social_5m RoBertaEmbeddings from YSKartal +author: John Snow Labs +name: berturk_social_5m +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berturk_social_5m` is a English model originally trained by YSKartal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berturk_social_5m_en_5.5.0_3.0_1725716621410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berturk_social_5m_en_5.5.0_3.0_1725716621410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("berturk_social_5m","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("berturk_social_5m","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berturk_social_5m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/YSKartal/berturk-social-5m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_pipeline_en.md new file mode 100644 index 00000000000000..ead923f54875e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-berturk_social_5m_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English berturk_social_5m_pipeline pipeline RoBertaEmbeddings from YSKartal +author: John Snow Labs +name: berturk_social_5m_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berturk_social_5m_pipeline` is a English model originally trained by YSKartal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berturk_social_5m_pipeline_en_5.5.0_3.0_1725716636167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berturk_social_5m_pipeline_en_5.5.0_3.0_1725716636167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("berturk_social_5m_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("berturk_social_5m_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berturk_social_5m_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/YSKartal/berturk-social-5m + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-biodivbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-biodivbert_en.md new file mode 100644 index 00000000000000..4b3e051b0c7950 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-biodivbert_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English biodivbert BertEmbeddings from NoYo25 +author: John Snow Labs +name: biodivbert +date: 2024-09-07 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biodivbert` is a English model originally trained by NoYo25. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biodivbert_en_5.5.0_3.0_1725735173112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biodivbert_en_5.5.0_3.0_1725735173112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biodivbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biodivbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biodivbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.6 MB| + +## References + +References + +https://huggingface.co/NoYo25/BiodivBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_00005_16_en.md b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_00005_16_en.md new file mode 100644 index 00000000000000..82e99679926ec4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_00005_16_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomedroberta_finetuned_valid_testing_0_00005_16 RoBertaForTokenClassification from pabRomero +author: John Snow Labs +name: biomedroberta_finetuned_valid_testing_0_00005_16 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedroberta_finetuned_valid_testing_0_00005_16` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_00005_16_en_5.5.0_3.0_1725706502289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_00005_16_en_5.5.0_3.0_1725706502289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("biomedroberta_finetuned_valid_testing_0_00005_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("biomedroberta_finetuned_valid_testing_0_00005_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedroberta_finetuned_valid_testing_0_00005_16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/pabRomero/BioMedRoBERTa-finetuned-valid-testing-0.00005-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_en.md b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_en.md new file mode 100644 index 00000000000000..9848a913efc70d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English biomedroberta_finetuned_valid_testing_0_0001_16 RoBertaForTokenClassification from pabRomero +author: John Snow Labs +name: biomedroberta_finetuned_valid_testing_0_0001_16 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedroberta_finetuned_valid_testing_0_0001_16` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_0001_16_en_5.5.0_3.0_1725707317606.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_0001_16_en_5.5.0_3.0_1725707317606.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("biomedroberta_finetuned_valid_testing_0_0001_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("biomedroberta_finetuned_valid_testing_0_0001_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedroberta_finetuned_valid_testing_0_0001_16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/pabRomero/BioMedRoBERTa-finetuned-valid-testing-0.0001-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en.md new file mode 100644 index 00000000000000..8dc4565456dcce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English biomedroberta_finetuned_valid_testing_0_0001_16_pipeline pipeline RoBertaForTokenClassification from pabRomero +author: John Snow Labs +name: biomedroberta_finetuned_valid_testing_0_0001_16_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedroberta_finetuned_valid_testing_0_0001_16_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en_5.5.0_3.0_1725707337939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedroberta_finetuned_valid_testing_0_0001_16_pipeline_en_5.5.0_3.0_1725707337939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("biomedroberta_finetuned_valid_testing_0_0001_16_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("biomedroberta_finetuned_valid_testing_0_0001_16_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedroberta_finetuned_valid_testing_0_0001_16_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.3 MB| + +## References + +https://huggingface.co/pabRomero/BioMedRoBERTa-finetuned-valid-testing-0.0001-16 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en.md b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en.md new file mode 100644 index 00000000000000..e1b82ebc05631c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bislama_all_bs160_allneg_finetuned_webnlg2020_correctness MPNetEmbeddings from teven +author: John Snow Labs +name: bislama_all_bs160_allneg_finetuned_webnlg2020_correctness +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bislama_all_bs160_allneg_finetuned_webnlg2020_correctness` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en_5.5.0_3.0_1725703568771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bislama_all_bs160_allneg_finetuned_webnlg2020_correctness_en_5.5.0_3.0_1725703568771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("bislama_all_bs160_allneg_finetuned_webnlg2020_correctness","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("bislama_all_bs160_allneg_finetuned_webnlg2020_correctness","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bislama_all_bs160_allneg_finetuned_webnlg2020_correctness| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/teven/bi_all_bs160_allneg_finetuned_WebNLG2020_correctness \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_en.md b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_en.md new file mode 100644 index 00000000000000..6d472757473057 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English bislama_all_mpnet_base_v2_finetuned_webnlg2017 MPNetEmbeddings from teven +author: John Snow Labs +name: bislama_all_mpnet_base_v2_finetuned_webnlg2017 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bislama_all_mpnet_base_v2_finetuned_webnlg2017` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2017_en_5.5.0_3.0_1725702868903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2017_en_5.5.0_3.0_1725702868903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("bislama_all_mpnet_base_v2_finetuned_webnlg2017","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("bislama_all_mpnet_base_v2_finetuned_webnlg2017","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bislama_all_mpnet_base_v2_finetuned_webnlg2017| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/teven/bi_all-mpnet-base-v2_finetuned_WebNLG2017 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en.md new file mode 100644 index 00000000000000..86f046d742fa1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline pipeline MPNetEmbeddings from teven +author: John Snow Labs +name: bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en_5.5.0_3.0_1725702886901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline_en_5.5.0_3.0_1725702886901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bislama_all_mpnet_base_v2_finetuned_webnlg2017_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/teven/bi_all-mpnet-base-v2_finetuned_WebNLG2017 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_en.md new file mode 100644 index 00000000000000..4aab897cb86eef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bmg_translation_lug_english_v1 MarianTransformer from atwine +author: John Snow Labs +name: bmg_translation_lug_english_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bmg_translation_lug_english_v1` is a English model originally trained by atwine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bmg_translation_lug_english_v1_en_5.5.0_3.0_1725740520743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bmg_translation_lug_english_v1_en_5.5.0_3.0_1725740520743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("bmg_translation_lug_english_v1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("bmg_translation_lug_english_v1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bmg_translation_lug_english_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|513.5 MB| + +## References + +https://huggingface.co/atwine/bmg-translation-lug-en-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_pipeline_en.md new file mode 100644 index 00000000000000..5f80ac443d7a7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bmg_translation_lug_english_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bmg_translation_lug_english_v1_pipeline pipeline MarianTransformer from atwine +author: John Snow Labs +name: bmg_translation_lug_english_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bmg_translation_lug_english_v1_pipeline` is a English model originally trained by atwine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bmg_translation_lug_english_v1_pipeline_en_5.5.0_3.0_1725740544550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bmg_translation_lug_english_v1_pipeline_en_5.5.0_3.0_1725740544550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bmg_translation_lug_english_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bmg_translation_lug_english_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bmg_translation_lug_english_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|514.0 MB| + +## References + +https://huggingface.co/atwine/bmg-translation-lug-en-v1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_en.md b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_en.md new file mode 100644 index 00000000000000..a321f5944eeed4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bpe_selfies_pubchem_shard00_120k RoBertaEmbeddings from seyonec +author: John Snow Labs +name: bpe_selfies_pubchem_shard00_120k +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bpe_selfies_pubchem_shard00_120k` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_120k_en_5.5.0_3.0_1725716255611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_120k_en_5.5.0_3.0_1725716255611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("bpe_selfies_pubchem_shard00_120k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("bpe_selfies_pubchem_shard00_120k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bpe_selfies_pubchem_shard00_120k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|309.4 MB| + +## References + +https://huggingface.co/seyonec/BPE_SELFIES_PubChem_shard00_120k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_pipeline_en.md new file mode 100644 index 00000000000000..435976d1cd2d5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_120k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bpe_selfies_pubchem_shard00_120k_pipeline pipeline RoBertaEmbeddings from seyonec +author: John Snow Labs +name: bpe_selfies_pubchem_shard00_120k_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bpe_selfies_pubchem_shard00_120k_pipeline` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_120k_pipeline_en_5.5.0_3.0_1725716269882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_120k_pipeline_en_5.5.0_3.0_1725716269882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bpe_selfies_pubchem_shard00_120k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bpe_selfies_pubchem_shard00_120k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bpe_selfies_pubchem_shard00_120k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.4 MB| + +## References + +https://huggingface.co/seyonec/BPE_SELFIES_PubChem_shard00_120k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_150k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_150k_pipeline_en.md new file mode 100644 index 00000000000000..6ee43fe0487da4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bpe_selfies_pubchem_shard00_150k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bpe_selfies_pubchem_shard00_150k_pipeline pipeline RoBertaEmbeddings from seyonec +author: John Snow Labs +name: bpe_selfies_pubchem_shard00_150k_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bpe_selfies_pubchem_shard00_150k_pipeline` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_150k_pipeline_en_5.5.0_3.0_1725672743969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bpe_selfies_pubchem_shard00_150k_pipeline_en_5.5.0_3.0_1725672743969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bpe_selfies_pubchem_shard00_150k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bpe_selfies_pubchem_shard00_150k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bpe_selfies_pubchem_shard00_150k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|309.3 MB| + +## References + +https://huggingface.co/seyonec/BPE_SELFIES_PubChem_shard00_150k + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-brwac_v1_2__checkpoint_last_en.md b/docs/_posts/ahmedlone127/2024-09-07-brwac_v1_2__checkpoint_last_en.md new file mode 100644 index 00000000000000..4c89a718efb243 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-brwac_v1_2__checkpoint_last_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English brwac_v1_2__checkpoint_last RoBertaEmbeddings from eduagarcia-temp +author: John Snow Labs +name: brwac_v1_2__checkpoint_last +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`brwac_v1_2__checkpoint_last` is a English model originally trained by eduagarcia-temp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/brwac_v1_2__checkpoint_last_en_5.5.0_3.0_1725716451468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/brwac_v1_2__checkpoint_last_en_5.5.0_3.0_1725716451468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("brwac_v1_2__checkpoint_last","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("brwac_v1_2__checkpoint_last","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|brwac_v1_2__checkpoint_last| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|298.3 MB| + +## References + +https://huggingface.co/eduagarcia-temp/brwac_v1_2__checkpoint_last \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_cantemist_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_cantemist_ner_pipeline_en.md new file mode 100644 index 00000000000000..48231e1b08df4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_cantemist_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_cantemist_ner_pipeline pipeline RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_cantemist_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_cantemist_ner_pipeline` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_cantemist_ner_pipeline_en_5.5.0_3.0_1725708008611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_cantemist_ner_pipeline_en_5.5.0_3.0_1725708008611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_cantemist_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_cantemist_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_cantemist_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|437.2 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-cantemist-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_es.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_es.md new file mode 100644 index 00000000000000..ec135f43b28edb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_carmen_distemist RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_carmen_distemist +date: 2024-09-07 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_carmen_distemist` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_distemist_es_5.5.0_3.0_1725667883931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_distemist_es_5.5.0_3.0_1725667883931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_carmen_distemist","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_carmen_distemist", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_carmen_distemist| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|449.4 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-carmen-distemist \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_pipeline_es.md new file mode 100644 index 00000000000000..5c8ba199ef990f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_distemist_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_carmen_distemist_pipeline pipeline RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_carmen_distemist_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_carmen_distemist_pipeline` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_distemist_pipeline_es_5.5.0_3.0_1725667906501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_distemist_pipeline_es_5.5.0_3.0_1725667906501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_carmen_distemist_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_carmen_distemist_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_carmen_distemist_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|449.5 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-carmen-distemist + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_meddocan_es.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_meddocan_es.md new file mode 100644 index 00000000000000..9dc6447559f23d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_carmen_meddocan_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_carmen_meddocan RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_carmen_meddocan +date: 2024-09-07 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_carmen_meddocan` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_meddocan_es_5.5.0_3.0_1725721601916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_carmen_meddocan_es_5.5.0_3.0_1725721601916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_carmen_meddocan","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_carmen_meddocan", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_carmen_meddocan| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|452.7 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-carmen-meddocan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en.md new file mode 100644 index 00000000000000..f8b59991630bde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline pipeline RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en_5.5.0_3.0_1725721028586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline_en_5.5.0_3.0_1725721028586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_combined_train_drugtemist_dev_85_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|440.6 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-combined-train-drugtemist-dev-85-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_livingner_humano_es.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_livingner_humano_es.md new file mode 100644 index 00000000000000..b7f62c7e04ac6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_livingner_humano_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish bsc_bio_ehr_spanish_livingner_humano RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: bsc_bio_ehr_spanish_livingner_humano +date: 2024-09-07 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_livingner_humano` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_livingner_humano_es_5.5.0_3.0_1725724145471.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_livingner_humano_es_5.5.0_3.0_1725724145471.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_livingner_humano","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("bsc_bio_ehr_spanish_livingner_humano", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_livingner_humano| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|449.9 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/bsc-bio-ehr-es-livingner-humano \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_symptemist_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_symptemist_ner_pipeline_en.md new file mode 100644 index 00000000000000..4f939b6f6394b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-bsc_bio_ehr_spanish_symptemist_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bsc_bio_ehr_spanish_symptemist_ner_pipeline pipeline RoBertaForTokenClassification from Rodrigo1771 +author: John Snow Labs +name: bsc_bio_ehr_spanish_symptemist_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bsc_bio_ehr_spanish_symptemist_ner_pipeline` is a English model originally trained by Rodrigo1771. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_ner_pipeline_en_5.5.0_3.0_1725668485022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bsc_bio_ehr_spanish_symptemist_ner_pipeline_en_5.5.0_3.0_1725668485022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bsc_bio_ehr_spanish_symptemist_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bsc_bio_ehr_spanish_symptemist_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bsc_bio_ehr_spanish_symptemist_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|434.9 MB| + +## References + +https://huggingface.co/Rodrigo1771/bsc-bio-ehr-es-symptemist-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_akash24_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_akash24_en.md new file mode 100644 index 00000000000000..11f99eb5bd2f8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_akash24_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_akash24 AlbertForSequenceClassification from Akash24 +author: John Snow Labs +name: burmese_awesome_model_akash24 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_akash24` is a English model originally trained by Akash24. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_akash24_en_5.5.0_3.0_1725732405878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_akash24_en_5.5.0_3.0_1725732405878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("burmese_awesome_model_akash24","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("burmese_awesome_model_akash24", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_akash24| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/Akash24/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_en.md new file mode 100644 index 00000000000000..2e69d529638026 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_model_mitra_uta DistilBertForSequenceClassification from Mitra-uta +author: John Snow Labs +name: burmese_awesome_model_mitra_uta +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_mitra_uta` is a English model originally trained by Mitra-uta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_mitra_uta_en_5.5.0_3.0_1725674436253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_mitra_uta_en_5.5.0_3.0_1725674436253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_mitra_uta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_awesome_model_mitra_uta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_mitra_uta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Mitra-uta/my_awesome_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_pipeline_en.md new file mode 100644 index 00000000000000..04a151b27ad46c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_model_mitra_uta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_model_mitra_uta_pipeline pipeline DistilBertForSequenceClassification from Mitra-uta +author: John Snow Labs +name: burmese_awesome_model_mitra_uta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_mitra_uta_pipeline` is a English model originally trained by Mitra-uta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_mitra_uta_pipeline_en_5.5.0_3.0_1725674448086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_mitra_uta_pipeline_en_5.5.0_3.0_1725674448086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_model_mitra_uta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_model_mitra_uta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_mitra_uta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Mitra-uta/my_awesome_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_en.md new file mode 100644 index 00000000000000..4adf9c79fa2683 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_abhinavreddy17 DistilBertForQuestionAnswering from abhinavreddy17 +author: John Snow Labs +name: burmese_awesome_qa_model_abhinavreddy17 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_abhinavreddy17` is a English model originally trained by abhinavreddy17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_abhinavreddy17_en_5.5.0_3.0_1725735951824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_abhinavreddy17_en_5.5.0_3.0_1725735951824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_abhinavreddy17","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_abhinavreddy17", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_abhinavreddy17| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/abhinavreddy17/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_pipeline_en.md new file mode 100644 index 00000000000000..c02585a3469248 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_abhinavreddy17_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_abhinavreddy17_pipeline pipeline DistilBertForQuestionAnswering from abhinavreddy17 +author: John Snow Labs +name: burmese_awesome_qa_model_abhinavreddy17_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_abhinavreddy17_pipeline` is a English model originally trained by abhinavreddy17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_abhinavreddy17_pipeline_en_5.5.0_3.0_1725735964788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_abhinavreddy17_pipeline_en_5.5.0_3.0_1725735964788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_abhinavreddy17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_abhinavreddy17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_abhinavreddy17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/abhinavreddy17/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ayushij074_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ayushij074_pipeline_en.md new file mode 100644 index 00000000000000..1c5b9c3987cf07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ayushij074_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_ayushij074_pipeline pipeline DistilBertForQuestionAnswering from Ayushij074 +author: John Snow Labs +name: burmese_awesome_qa_model_ayushij074_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_ayushij074_pipeline` is a English model originally trained by Ayushij074. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ayushij074_pipeline_en_5.5.0_3.0_1725745822477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ayushij074_pipeline_en_5.5.0_3.0_1725745822477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_ayushij074_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_ayushij074_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_ayushij074_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ayushij074/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_b43646_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_b43646_pipeline_en.md new file mode 100644 index 00000000000000..195b11e9deeb1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_b43646_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_b43646_pipeline pipeline DistilBertForQuestionAnswering from b43646 +author: John Snow Labs +name: burmese_awesome_qa_model_b43646_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_b43646_pipeline` is a English model originally trained by b43646. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_b43646_pipeline_en_5.5.0_3.0_1725695606685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_b43646_pipeline_en_5.5.0_3.0_1725695606685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_b43646_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_b43646_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_b43646_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/b43646/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_en.md new file mode 100644 index 00000000000000..f21eea02ac192a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_bbrenes DistilBertForQuestionAnswering from bbrenes +author: John Snow Labs +name: burmese_awesome_qa_model_bbrenes +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_bbrenes` is a English model originally trained by bbrenes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bbrenes_en_5.5.0_3.0_1725695813981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bbrenes_en_5.5.0_3.0_1725695813981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_bbrenes","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_bbrenes", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_bbrenes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bbrenes/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_pipeline_en.md new file mode 100644 index 00000000000000..b8c2f6f3344fef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bbrenes_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_bbrenes_pipeline pipeline DistilBertForQuestionAnswering from bbrenes +author: John Snow Labs +name: burmese_awesome_qa_model_bbrenes_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_bbrenes_pipeline` is a English model originally trained by bbrenes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bbrenes_pipeline_en_5.5.0_3.0_1725695825395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bbrenes_pipeline_en_5.5.0_3.0_1725695825395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_bbrenes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_bbrenes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_bbrenes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bbrenes/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bilalkhan2024_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bilalkhan2024_pipeline_en.md new file mode 100644 index 00000000000000..22410437e590a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_bilalkhan2024_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_bilalkhan2024_pipeline pipeline DistilBertForQuestionAnswering from bilalkhan2024 +author: John Snow Labs +name: burmese_awesome_qa_model_bilalkhan2024_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_bilalkhan2024_pipeline` is a English model originally trained by bilalkhan2024. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bilalkhan2024_pipeline_en_5.5.0_3.0_1725735812943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_bilalkhan2024_pipeline_en_5.5.0_3.0_1725735812943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_bilalkhan2024_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_bilalkhan2024_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_bilalkhan2024_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bilalkhan2024/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_dedemilano_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_dedemilano_en.md new file mode 100644 index 00000000000000..edfd43ac1f5676 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_dedemilano_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_dedemilano DistilBertForQuestionAnswering from dedemilano +author: John Snow Labs +name: burmese_awesome_qa_model_dedemilano +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_dedemilano` is a English model originally trained by dedemilano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_dedemilano_en_5.5.0_3.0_1725746063107.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_dedemilano_en_5.5.0_3.0_1725746063107.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_dedemilano","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_dedemilano", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_dedemilano| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dedemilano/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_en.md new file mode 100644 index 00000000000000..e4cd0817b2d0db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_freongas DistilBertForQuestionAnswering from freongas +author: John Snow Labs +name: burmese_awesome_qa_model_freongas +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_freongas` is a English model originally trained by freongas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_freongas_en_5.5.0_3.0_1725736130677.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_freongas_en_5.5.0_3.0_1725736130677.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_freongas","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_freongas", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_freongas| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/freongas/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_pipeline_en.md new file mode 100644 index 00000000000000..00ad0a84759c6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_freongas_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_freongas_pipeline pipeline DistilBertForQuestionAnswering from freongas +author: John Snow Labs +name: burmese_awesome_qa_model_freongas_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_freongas_pipeline` is a English model originally trained by freongas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_freongas_pipeline_en_5.5.0_3.0_1725736143248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_freongas_pipeline_en_5.5.0_3.0_1725736143248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_freongas_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_freongas_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_freongas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/freongas/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_gaogao8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_gaogao8_pipeline_en.md new file mode 100644 index 00000000000000..73065cbe9fdad7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_gaogao8_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_gaogao8_pipeline pipeline DistilBertForQuestionAnswering from gaogao8 +author: John Snow Labs +name: burmese_awesome_qa_model_gaogao8_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_gaogao8_pipeline` is a English model originally trained by gaogao8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_gaogao8_pipeline_en_5.5.0_3.0_1725746174731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_gaogao8_pipeline_en_5.5.0_3.0_1725746174731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_gaogao8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_gaogao8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_gaogao8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gaogao8/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jackyfung00358_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jackyfung00358_en.md new file mode 100644 index 00000000000000..585e894ee3895b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jackyfung00358_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_jackyfung00358 DistilBertForQuestionAnswering from jackyfung00358 +author: John Snow Labs +name: burmese_awesome_qa_model_jackyfung00358 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_jackyfung00358` is a English model originally trained by jackyfung00358. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jackyfung00358_en_5.5.0_3.0_1725727372744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jackyfung00358_en_5.5.0_3.0_1725727372744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jackyfung00358","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jackyfung00358", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_jackyfung00358| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jackyfung00358/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_en.md new file mode 100644 index 00000000000000..6f9631b30d411a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_jamjacob DistilBertForQuestionAnswering from jamjacob +author: John Snow Labs +name: burmese_awesome_qa_model_jamjacob +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_jamjacob` is a English model originally trained by jamjacob. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jamjacob_en_5.5.0_3.0_1725727095600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jamjacob_en_5.5.0_3.0_1725727095600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jamjacob","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jamjacob", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_jamjacob| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jamjacob/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_pipeline_en.md new file mode 100644 index 00000000000000..2d256798835d00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jamjacob_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_jamjacob_pipeline pipeline DistilBertForQuestionAnswering from jamjacob +author: John Snow Labs +name: burmese_awesome_qa_model_jamjacob_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_jamjacob_pipeline` is a English model originally trained by jamjacob. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jamjacob_pipeline_en_5.5.0_3.0_1725727106908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jamjacob_pipeline_en_5.5.0_3.0_1725727106908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_jamjacob_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_jamjacob_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_jamjacob_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/jamjacob/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jyl480_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jyl480_en.md new file mode 100644 index 00000000000000..8a44972badf03f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_jyl480_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_jyl480 DistilBertForQuestionAnswering from JYL480 +author: John Snow Labs +name: burmese_awesome_qa_model_jyl480 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_jyl480` is a English model originally trained by JYL480. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jyl480_en_5.5.0_3.0_1725746031004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_jyl480_en_5.5.0_3.0_1725746031004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jyl480","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_jyl480", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_jyl480| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JYL480/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_kalyanmaram_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_kalyanmaram_pipeline_en.md new file mode 100644 index 00000000000000..e2775a451c849d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_kalyanmaram_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_kalyanmaram_pipeline pipeline DistilBertForQuestionAnswering from kalyanmaram +author: John Snow Labs +name: burmese_awesome_qa_model_kalyanmaram_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_kalyanmaram_pipeline` is a English model originally trained by kalyanmaram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kalyanmaram_pipeline_en_5.5.0_3.0_1725695180477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_kalyanmaram_pipeline_en_5.5.0_3.0_1725695180477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_kalyanmaram_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_kalyanmaram_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_kalyanmaram_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kalyanmaram/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_markchiing_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_markchiing_en.md new file mode 100644 index 00000000000000..88a017e8f22081 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_markchiing_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_markchiing DistilBertForQuestionAnswering from MarkChiing +author: John Snow Labs +name: burmese_awesome_qa_model_markchiing +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_markchiing` is a English model originally trained by MarkChiing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_markchiing_en_5.5.0_3.0_1725736326054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_markchiing_en_5.5.0_3.0_1725736326054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_markchiing","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_markchiing", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_markchiing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.1 MB| + +## References + +https://huggingface.co/MarkChiing/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_en.md new file mode 100644 index 00000000000000..a13cbe87e4563e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_mattdyor DistilBertForQuestionAnswering from mattdyor +author: John Snow Labs +name: burmese_awesome_qa_model_mattdyor +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_mattdyor` is a English model originally trained by mattdyor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_mattdyor_en_5.5.0_3.0_1725736305294.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_mattdyor_en_5.5.0_3.0_1725736305294.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_mattdyor","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_mattdyor", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_mattdyor| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mattdyor/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_pipeline_en.md new file mode 100644 index 00000000000000..36ae36287c1756 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_mattdyor_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_mattdyor_pipeline pipeline DistilBertForQuestionAnswering from mattdyor +author: John Snow Labs +name: burmese_awesome_qa_model_mattdyor_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_mattdyor_pipeline` is a English model originally trained by mattdyor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_mattdyor_pipeline_en_5.5.0_3.0_1725736317957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_mattdyor_pipeline_en_5.5.0_3.0_1725736317957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_mattdyor_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_mattdyor_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_mattdyor_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mattdyor/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_myajun_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_myajun_en.md new file mode 100644 index 00000000000000..27048269e077da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_myajun_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_myajun DistilBertForQuestionAnswering from myajun +author: John Snow Labs +name: burmese_awesome_qa_model_myajun +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_myajun` is a English model originally trained by myajun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_myajun_en_5.5.0_3.0_1725746150752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_myajun_en_5.5.0_3.0_1725746150752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_myajun","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_myajun", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_myajun| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/myajun/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_pavi156_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_pavi156_pipeline_en.md new file mode 100644 index 00000000000000..ca1aca1e738d89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_pavi156_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_pavi156_pipeline pipeline DistilBertForQuestionAnswering from pavi156 +author: John Snow Labs +name: burmese_awesome_qa_model_pavi156_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_pavi156_pipeline` is a English model originally trained by pavi156. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_pavi156_pipeline_en_5.5.0_3.0_1725736350819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_pavi156_pipeline_en_5.5.0_3.0_1725736350819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_pavi156_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_pavi156_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_pavi156_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pavi156/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_en.md new file mode 100644 index 00000000000000..2675a8c4136335 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_rahulcdeo DistilBertForQuestionAnswering from rahulcdeo +author: John Snow Labs +name: burmese_awesome_qa_model_rahulcdeo +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_rahulcdeo` is a English model originally trained by rahulcdeo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rahulcdeo_en_5.5.0_3.0_1725727205082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rahulcdeo_en_5.5.0_3.0_1725727205082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_rahulcdeo","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_rahulcdeo", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_rahulcdeo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rahulcdeo/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_pipeline_en.md new file mode 100644 index 00000000000000..a3724b062ed54c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_rahulcdeo_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_rahulcdeo_pipeline pipeline DistilBertForQuestionAnswering from rahulcdeo +author: John Snow Labs +name: burmese_awesome_qa_model_rahulcdeo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_rahulcdeo_pipeline` is a English model originally trained by rahulcdeo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rahulcdeo_pipeline_en_5.5.0_3.0_1725727217244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_rahulcdeo_pipeline_en_5.5.0_3.0_1725727217244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_rahulcdeo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_rahulcdeo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_rahulcdeo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/rahulcdeo/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ravinderbrai_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ravinderbrai_en.md new file mode 100644 index 00000000000000..71d35a51b7b5c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_ravinderbrai_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_ravinderbrai DistilBertForQuestionAnswering from ravinderbrai +author: John Snow Labs +name: burmese_awesome_qa_model_ravinderbrai +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_ravinderbrai` is a English model originally trained by ravinderbrai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ravinderbrai_en_5.5.0_3.0_1725722871791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_ravinderbrai_en_5.5.0_3.0_1725722871791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_ravinderbrai","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_ravinderbrai", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_ravinderbrai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ravinderbrai/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_reza2002_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_reza2002_pipeline_en.md new file mode 100644 index 00000000000000..aaaed586ee742f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_reza2002_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_reza2002_pipeline pipeline DistilBertForQuestionAnswering from Reza2002 +author: John Snow Labs +name: burmese_awesome_qa_model_reza2002_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_reza2002_pipeline` is a English model originally trained by Reza2002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_reza2002_pipeline_en_5.5.0_3.0_1725736293016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_reza2002_pipeline_en_5.5.0_3.0_1725736293016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_reza2002_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_reza2002_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_reza2002_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Reza2002/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_shrutina_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_shrutina_pipeline_en.md new file mode 100644 index 00000000000000..c2133d9fd8b13d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_shrutina_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_shrutina_pipeline pipeline DistilBertForQuestionAnswering from Shrutina +author: John Snow Labs +name: burmese_awesome_qa_model_shrutina_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_shrutina_pipeline` is a English model originally trained by Shrutina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_shrutina_pipeline_en_5.5.0_3.0_1725727588282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_shrutina_pipeline_en_5.5.0_3.0_1725727588282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_shrutina_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_shrutina_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_shrutina_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Shrutina/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_venkatarajendra_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_venkatarajendra_en.md new file mode 100644 index 00000000000000..9cef73ebc8441a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_venkatarajendra_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_venkatarajendra DistilBertForQuestionAnswering from venkatarajendra +author: John Snow Labs +name: burmese_awesome_qa_model_venkatarajendra +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_venkatarajendra` is a English model originally trained by venkatarajendra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_venkatarajendra_en_5.5.0_3.0_1725726988832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_venkatarajendra_en_5.5.0_3.0_1725726988832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_venkatarajendra","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_venkatarajendra", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_venkatarajendra| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/venkatarajendra/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_vikas12061995_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_vikas12061995_pipeline_en.md new file mode 100644 index 00000000000000..7e49916512cc22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_vikas12061995_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_awesome_qa_model_vikas12061995_pipeline pipeline DistilBertForQuestionAnswering from vikas12061995 +author: John Snow Labs +name: burmese_awesome_qa_model_vikas12061995_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_vikas12061995_pipeline` is a English model originally trained by vikas12061995. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_vikas12061995_pipeline_en_5.5.0_3.0_1725746063348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_vikas12061995_pipeline_en_5.5.0_3.0_1725746063348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_qa_model_vikas12061995_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_qa_model_vikas12061995_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_vikas12061995_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vikas12061995/my_awesome_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_wandaabudiono_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_wandaabudiono_en.md new file mode 100644 index 00000000000000..432a4921e90dbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_qa_model_wandaabudiono_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_qa_model_wandaabudiono DistilBertForQuestionAnswering from WandaaBudiono +author: John Snow Labs +name: burmese_awesome_qa_model_wandaabudiono +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_qa_model_wandaabudiono` is a English model originally trained by WandaaBudiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_wandaabudiono_en_5.5.0_3.0_1725745625784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_qa_model_wandaabudiono_en_5.5.0_3.0_1725745625784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_wandaabudiono","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_awesome_qa_model_wandaabudiono", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_qa_model_wandaabudiono| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/WandaaBudiono/my_awesome_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_setfit_model_ivanzidov_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_setfit_model_ivanzidov_en.md new file mode 100644 index 00000000000000..8a312fa3ce8f85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_setfit_model_ivanzidov_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_awesome_setfit_model_ivanzidov MPNetEmbeddings from ivanzidov +author: John Snow Labs +name: burmese_awesome_setfit_model_ivanzidov +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_setfit_model_ivanzidov` is a English model originally trained by ivanzidov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model_ivanzidov_en_5.5.0_3.0_1725703132360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_setfit_model_ivanzidov_en_5.5.0_3.0_1725703132360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("burmese_awesome_setfit_model_ivanzidov","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("burmese_awesome_setfit_model_ivanzidov","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_setfit_model_ivanzidov| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ivanzidov/my-awesome-setfit-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_en.md new file mode 100644 index 00000000000000..8aa32bbc497d49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_text_classification_v2_1_0 XlmRoBertaForSequenceClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_text_classification_v2_1_0 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_text_classification_v2_1_0` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_text_classification_v2_1_0_en_5.5.0_3.0_1725713183786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_text_classification_v2_1_0_en_5.5.0_3.0_1725713183786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("burmese_awesome_text_classification_v2_1_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("burmese_awesome_text_classification_v2_1_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_text_classification_v2_1_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|825.8 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_text_classification_v2.1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_pipeline_en.md new file mode 100644 index 00000000000000..3dfcffaacafa67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_text_classification_v2_1_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_text_classification_v2_1_0_pipeline pipeline XlmRoBertaForSequenceClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_text_classification_v2_1_0_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_text_classification_v2_1_0_pipeline` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_text_classification_v2_1_0_pipeline_en_5.5.0_3.0_1725713288807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_text_classification_v2_1_0_pipeline_en_5.5.0_3.0_1725713288807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_text_classification_v2_1_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_text_classification_v2_1_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_text_classification_v2_1_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|825.8 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_text_classification_v2.1.0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_en.md new file mode 100644 index 00000000000000..8286142e7335dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_token_classification_v2_1_3 XlmRoBertaForTokenClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_token_classification_v2_1_3 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_token_classification_v2_1_3` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_3_en_5.5.0_3.0_1725705425889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_3_en_5.5.0_3.0_1725705425889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("burmese_awesome_token_classification_v2_1_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("burmese_awesome_token_classification_v2_1_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_token_classification_v2_1_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|830.2 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_token_classification_v2.1.3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_pipeline_en.md new file mode 100644 index 00000000000000..b1f8bedc65db1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_token_classification_v2_1_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_token_classification_v2_1_3_pipeline pipeline XlmRoBertaForTokenClassification from lilyyellow +author: John Snow Labs +name: burmese_awesome_token_classification_v2_1_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_token_classification_v2_1_3_pipeline` is a English model originally trained by lilyyellow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_3_pipeline_en_5.5.0_3.0_1725705531280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_token_classification_v2_1_3_pipeline_en_5.5.0_3.0_1725705531280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_token_classification_v2_1_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_token_classification_v2_1_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_token_classification_v2_1_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|830.2 MB| + +## References + +https://huggingface.co/lilyyellow/my_awesome_token_classification_v2.1.3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_jhs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_jhs_pipeline_en.md new file mode 100644 index 00000000000000..cb7b9fc9ec0149 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_jhs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_all_jhs_pipeline pipeline DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_all_jhs_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_all_jhs_pipeline` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_jhs_pipeline_en_5.5.0_3.0_1725739488193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_jhs_pipeline_en_5.5.0_3.0_1725739488193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_all_jhs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_all_jhs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_all_jhs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_all_JHs + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_place_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_place_en.md new file mode 100644 index 00000000000000..ce2140984d39a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_all_place_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_all_place DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_all_place +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_all_place` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_place_en_5.5.0_3.0_1725739276809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_all_place_en_5.5.0_3.0_1725739276809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_all_place","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_all_place", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_all_place| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_all_Place \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_anirudhramoo_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_anirudhramoo_en.md new file mode 100644 index 00000000000000..6999f14a535390 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_anirudhramoo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_anirudhramoo DistilBertForTokenClassification from anirudhramoo +author: John Snow Labs +name: burmese_awesome_wnut_model_anirudhramoo +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_anirudhramoo` is a English model originally trained by anirudhramoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_anirudhramoo_en_5.5.0_3.0_1725739220227.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_anirudhramoo_en_5.5.0_3.0_1725739220227.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_anirudhramoo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_anirudhramoo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_anirudhramoo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/anirudhramoo/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_carlonos_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_carlonos_en.md new file mode 100644 index 00000000000000..49cf567625b6b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_carlonos_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_carlonos DistilBertForTokenClassification from Carlonos +author: John Snow Labs +name: burmese_awesome_wnut_model_carlonos +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_carlonos` is a English model originally trained by Carlonos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_carlonos_en_5.5.0_3.0_1725739464870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_carlonos_en_5.5.0_3.0_1725739464870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_carlonos","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_carlonos", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_carlonos| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Carlonos/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_halikuralde2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_halikuralde2_pipeline_en.md new file mode 100644 index 00000000000000..1a47eb56623d8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_halikuralde2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_halikuralde2_pipeline pipeline DistilBertForTokenClassification from halikuralde2 +author: John Snow Labs +name: burmese_awesome_wnut_model_halikuralde2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_halikuralde2_pipeline` is a English model originally trained by halikuralde2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_halikuralde2_pipeline_en_5.5.0_3.0_1725730272743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_halikuralde2_pipeline_en_5.5.0_3.0_1725730272743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_halikuralde2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_halikuralde2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_halikuralde2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/halikuralde2/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_pavement_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_pavement_pipeline_en.md new file mode 100644 index 00000000000000..74ba03c7ecd651 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_pavement_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_pavement_pipeline pipeline DistilBertForTokenClassification from pavement +author: John Snow Labs +name: burmese_awesome_wnut_model_pavement_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_pavement_pipeline` is a English model originally trained by pavement. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_pavement_pipeline_en_5.5.0_3.0_1725730768941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_pavement_pipeline_en_5.5.0_3.0_1725730768941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_pavement_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_pavement_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_pavement_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/pavement/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_priyanshug0405_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_priyanshug0405_pipeline_en.md new file mode 100644 index 00000000000000..4d5c6489953ecc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_priyanshug0405_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_priyanshug0405_pipeline pipeline DistilBertForTokenClassification from priyanshug0405 +author: John Snow Labs +name: burmese_awesome_wnut_model_priyanshug0405_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_priyanshug0405_pipeline` is a English model originally trained by priyanshug0405. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_priyanshug0405_pipeline_en_5.5.0_3.0_1725739130104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_priyanshug0405_pipeline_en_5.5.0_3.0_1725739130104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_priyanshug0405_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_priyanshug0405_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_priyanshug0405_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/priyanshug0405/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_robertiulian10_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_robertiulian10_en.md new file mode 100644 index 00000000000000..061b91e1045000 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_robertiulian10_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_robertiulian10 DistilBertForTokenClassification from RobertIulian10 +author: John Snow Labs +name: burmese_awesome_wnut_model_robertiulian10 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_robertiulian10` is a English model originally trained by RobertIulian10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_robertiulian10_en_5.5.0_3.0_1725733923380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_robertiulian10_en_5.5.0_3.0_1725733923380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_robertiulian10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_robertiulian10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_robertiulian10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/RobertIulian10/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_sreeharipv_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_sreeharipv_en.md new file mode 100644 index 00000000000000..2ac205d6fb8c25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_sreeharipv_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_sreeharipv DistilBertForTokenClassification from sreeharipv +author: John Snow Labs +name: burmese_awesome_wnut_model_sreeharipv +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_sreeharipv` is a English model originally trained by sreeharipv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_sreeharipv_en_5.5.0_3.0_1725733923802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_sreeharipv_en_5.5.0_3.0_1725733923802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_sreeharipv","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_sreeharipv", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_sreeharipv| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/sreeharipv/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_stephen_osullivan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_stephen_osullivan_pipeline_en.md new file mode 100644 index 00000000000000..568cd389260da9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_stephen_osullivan_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_stephen_osullivan_pipeline pipeline DistilBertForTokenClassification from stephen-osullivan +author: John Snow Labs +name: burmese_awesome_wnut_model_stephen_osullivan_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_stephen_osullivan_pipeline` is a English model originally trained by stephen-osullivan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_stephen_osullivan_pipeline_en_5.5.0_3.0_1725739130110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_stephen_osullivan_pipeline_en_5.5.0_3.0_1725739130110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_awesome_wnut_model_stephen_osullivan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_awesome_wnut_model_stephen_osullivan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_stephen_osullivan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/stephen-osullivan/my_awesome_wnut_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_wstcpyt1988_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_wstcpyt1988_en.md new file mode 100644 index 00000000000000..6e05caa207d735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_model_wstcpyt1988_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_model_wstcpyt1988 DistilBertForTokenClassification from wstcpyt1988 +author: John Snow Labs +name: burmese_awesome_wnut_model_wstcpyt1988 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_model_wstcpyt1988` is a English model originally trained by wstcpyt1988. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_wstcpyt1988_en_5.5.0_3.0_1725739366147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_model_wstcpyt1988_en_5.5.0_3.0_1725739366147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_wstcpyt1988","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_model_wstcpyt1988", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_model_wstcpyt1988| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/wstcpyt1988/my_awesome_wnut_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_neg_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_neg_en.md new file mode 100644 index 00000000000000..7a3b7350953087 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_awesome_wnut_neg_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_awesome_wnut_neg DistilBertForTokenClassification from gonzalezrostani +author: John Snow Labs +name: burmese_awesome_wnut_neg +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_wnut_neg` is a English model originally trained by gonzalezrostani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_neg_en_5.5.0_3.0_1725734318063.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_wnut_neg_en_5.5.0_3.0_1725734318063.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_neg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("burmese_awesome_wnut_neg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_wnut_neg| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gonzalezrostani/my_awesome_wnut_NEG \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_distilbert_model_qaicodes_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_distilbert_model_qaicodes_en.md new file mode 100644 index 00000000000000..4fa9c11895703c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_distilbert_model_qaicodes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English burmese_distilbert_model_qaicodes DistilBertForSequenceClassification from qaicodes +author: John Snow Labs +name: burmese_distilbert_model_qaicodes +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_distilbert_model_qaicodes` is a English model originally trained by qaicodes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_distilbert_model_qaicodes_en_5.5.0_3.0_1725674388536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_distilbert_model_qaicodes_en_5.5.0_3.0_1725674388536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_distilbert_model_qaicodes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("burmese_distilbert_model_qaicodes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_distilbert_model_qaicodes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/qaicodes/my_distilbert_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_first_qa_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_first_qa_model_en.md new file mode 100644 index 00000000000000..e4b0fdbc8122be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_first_qa_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_first_qa_model DistilBertForQuestionAnswering from idah4 +author: John Snow Labs +name: burmese_first_qa_model +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_first_qa_model` is a English model originally trained by idah4. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_first_qa_model_en_5.5.0_3.0_1725745712151.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_first_qa_model_en_5.5.0_3.0_1725745712151.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_first_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_first_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_first_qa_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/idah4/my_first_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_en.md new file mode 100644 index 00000000000000..ceae812b6af257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_qa_model_martacaldero DistilBertForQuestionAnswering from MartaCaldero +author: John Snow Labs +name: burmese_qa_model_martacaldero +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_qa_model_martacaldero` is a English model originally trained by MartaCaldero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_qa_model_martacaldero_en_5.5.0_3.0_1725745741555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_qa_model_martacaldero_en_5.5.0_3.0_1725745741555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_qa_model_martacaldero","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_qa_model_martacaldero", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_qa_model_martacaldero| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MartaCaldero/my_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_pipeline_en.md new file mode 100644 index 00000000000000..faf64852fffeff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_qa_model_martacaldero_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_qa_model_martacaldero_pipeline pipeline DistilBertForQuestionAnswering from MartaCaldero +author: John Snow Labs +name: burmese_qa_model_martacaldero_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_qa_model_martacaldero_pipeline` is a English model originally trained by MartaCaldero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_qa_model_martacaldero_pipeline_en_5.5.0_3.0_1725745755150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_qa_model_martacaldero_pipeline_en_5.5.0_3.0_1725745755150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_qa_model_martacaldero_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_qa_model_martacaldero_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_qa_model_martacaldero_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MartaCaldero/my_qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_spanish_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_spanish_model_en.md new file mode 100644 index 00000000000000..204cbe2ba2a18b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_spanish_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_spanish_model DistilBertForQuestionAnswering from jeguinoa +author: John Snow Labs +name: burmese_spanish_model +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_spanish_model` is a English model originally trained by jeguinoa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_spanish_model_en_5.5.0_3.0_1725736136180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_spanish_model_en_5.5.0_3.0_1725736136180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_spanish_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("burmese_spanish_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_spanish_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/jeguinoa/my_spanish_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_en.md new file mode 100644 index 00000000000000..43315efefeedb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English burmese_test_setfit_model MPNetEmbeddings from celise88 +author: John Snow Labs +name: burmese_test_setfit_model +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_test_setfit_model` is a English model originally trained by celise88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_test_setfit_model_en_5.5.0_3.0_1725703211944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_test_setfit_model_en_5.5.0_3.0_1725703211944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("burmese_test_setfit_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("burmese_test_setfit_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_test_setfit_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/celise88/my-test-setfit-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_pipeline_en.md new file mode 100644 index 00000000000000..10ea7a027f1296 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-burmese_test_setfit_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English burmese_test_setfit_model_pipeline pipeline MPNetEmbeddings from celise88 +author: John Snow Labs +name: burmese_test_setfit_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_test_setfit_model_pipeline` is a English model originally trained by celise88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_test_setfit_model_pipeline_en_5.5.0_3.0_1725703230639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_test_setfit_model_pipeline_en_5.5.0_3.0_1725703230639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("burmese_test_setfit_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("burmese_test_setfit_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_test_setfit_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/celise88/my-test-setfit-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-byline_detection_en.md b/docs/_posts/ahmedlone127/2024-09-07-byline_detection_en.md new file mode 100644 index 00000000000000..a125399a389a57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-byline_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English byline_detection DistilBertForTokenClassification from dell-research-harvard +author: John Snow Labs +name: byline_detection +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`byline_detection` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/byline_detection_en_5.5.0_3.0_1725731117798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/byline_detection_en_5.5.0_3.0_1725731117798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("byline_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("byline_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|byline_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/dell-research-harvard/byline-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-byline_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-byline_detection_pipeline_en.md new file mode 100644 index 00000000000000..592e5bf5fd79d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-byline_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English byline_detection_pipeline pipeline DistilBertForTokenClassification from dell-research-harvard +author: John Snow Labs +name: byline_detection_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`byline_detection_pipeline` is a English model originally trained by dell-research-harvard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/byline_detection_pipeline_en_5.5.0_3.0_1725731128893.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/byline_detection_pipeline_en_5.5.0_3.0_1725731128893.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("byline_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("byline_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|byline_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/dell-research-harvard/byline-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-camembert_base_finetuned_on_runaways_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-camembert_base_finetuned_on_runaways_french_pipeline_en.md new file mode 100644 index 00000000000000..a9d13e98373159 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-camembert_base_finetuned_on_runaways_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English camembert_base_finetuned_on_runaways_french_pipeline pipeline CamemBertEmbeddings from Nadav +author: John Snow Labs +name: camembert_base_finetuned_on_runaways_french_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`camembert_base_finetuned_on_runaways_french_pipeline` is a English model originally trained by Nadav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/camembert_base_finetuned_on_runaways_french_pipeline_en_5.5.0_3.0_1725691697196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/camembert_base_finetuned_on_runaways_french_pipeline_en_5.5.0_3.0_1725691697196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("camembert_base_finetuned_on_runaways_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("camembert_base_finetuned_on_runaways_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|camembert_base_finetuned_on_runaways_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.7 MB| + +## References + +https://huggingface.co/Nadav/camembert-base-finetuned-on-runaways-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cat_ner_xlmr_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-cat_ner_xlmr_2_pipeline_en.md new file mode 100644 index 00000000000000..0d2d908f361e09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cat_ner_xlmr_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cat_ner_xlmr_2_pipeline pipeline XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: cat_ner_xlmr_2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cat_ner_xlmr_2_pipeline` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cat_ner_xlmr_2_pipeline_en_5.5.0_3.0_1725705553214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cat_ner_xlmr_2_pipeline_en_5.5.0_3.0_1725705553214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cat_ner_xlmr_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cat_ner_xlmr_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cat_ner_xlmr_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|814.3 MB| + +## References + +https://huggingface.co/homersimpson/cat-ner-xlmr-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cat_sayula_popoluca_iwcg_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-cat_sayula_popoluca_iwcg_3_en.md new file mode 100644 index 00000000000000..76001cbe6cb219 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cat_sayula_popoluca_iwcg_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cat_sayula_popoluca_iwcg_3 XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: cat_sayula_popoluca_iwcg_3 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cat_sayula_popoluca_iwcg_3` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cat_sayula_popoluca_iwcg_3_en_5.5.0_3.0_1725744802679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cat_sayula_popoluca_iwcg_3_en_5.5.0_3.0_1725744802679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("cat_sayula_popoluca_iwcg_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("cat_sayula_popoluca_iwcg_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cat_sayula_popoluca_iwcg_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|432.1 MB| + +## References + +https://huggingface.co/homersimpson/cat-pos-iwcg-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-chemberta_pubchem1m_shard00_140k_en.md b/docs/_posts/ahmedlone127/2024-09-07-chemberta_pubchem1m_shard00_140k_en.md new file mode 100644 index 00000000000000..843cfefefa1852 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-chemberta_pubchem1m_shard00_140k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English chemberta_pubchem1m_shard00_140k RoBertaEmbeddings from seyonec +author: John Snow Labs +name: chemberta_pubchem1m_shard00_140k +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemberta_pubchem1m_shard00_140k` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_140k_en_5.5.0_3.0_1725678649528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemberta_pubchem1m_shard00_140k_en_5.5.0_3.0_1725678649528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("chemberta_pubchem1m_shard00_140k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("chemberta_pubchem1m_shard00_140k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemberta_pubchem1m_shard00_140k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|310.6 MB| + +## References + +https://huggingface.co/seyonec/ChemBERTA_PubChem1M_shard00_140k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_en.md new file mode 100644 index 00000000000000..f559fa708044ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English chinese_medical_ner BertForTokenClassification from lixin12345 +author: John Snow Labs +name: chinese_medical_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chinese_medical_ner` is a English model originally trained by lixin12345. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chinese_medical_ner_en_5.5.0_3.0_1725690599204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chinese_medical_ner_en_5.5.0_3.0_1725690599204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("chinese_medical_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("chinese_medical_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chinese_medical_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|381.1 MB| + +## References + +https://huggingface.co/lixin12345/chinese-medical-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_pipeline_en.md new file mode 100644 index 00000000000000..54cfbc03a9c885 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-chinese_medical_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English chinese_medical_ner_pipeline pipeline BertForTokenClassification from lixin12345 +author: John Snow Labs +name: chinese_medical_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chinese_medical_ner_pipeline` is a English model originally trained by lixin12345. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chinese_medical_ner_pipeline_en_5.5.0_3.0_1725690617262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chinese_medical_ner_pipeline_en_5.5.0_3.0_1725690617262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("chinese_medical_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("chinese_medical_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chinese_medical_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|381.2 MB| + +## References + +https://huggingface.co/lixin12345/chinese-medical-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_en.md b/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_en.md new file mode 100644 index 00000000000000..801a23dfe469df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cnec_1_1_supertypes_czert_b_base_cased BertForTokenClassification from stulcrad +author: John Snow Labs +name: cnec_1_1_supertypes_czert_b_base_cased +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cnec_1_1_supertypes_czert_b_base_cased` is a English model originally trained by stulcrad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cnec_1_1_supertypes_czert_b_base_cased_en_5.5.0_3.0_1725690744867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cnec_1_1_supertypes_czert_b_base_cased_en_5.5.0_3.0_1725690744867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("cnec_1_1_supertypes_czert_b_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("cnec_1_1_supertypes_czert_b_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cnec_1_1_supertypes_czert_b_base_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/stulcrad/CNEC_1_1_Supertypes_Czert-B-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_pipeline_en.md new file mode 100644 index 00000000000000..e0ca4eb7d4148a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cnec_1_1_supertypes_czert_b_base_cased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cnec_1_1_supertypes_czert_b_base_cased_pipeline pipeline BertForTokenClassification from stulcrad +author: John Snow Labs +name: cnec_1_1_supertypes_czert_b_base_cased_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cnec_1_1_supertypes_czert_b_base_cased_pipeline` is a English model originally trained by stulcrad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cnec_1_1_supertypes_czert_b_base_cased_pipeline_en_5.5.0_3.0_1725690763295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cnec_1_1_supertypes_czert_b_base_cased_pipeline_en_5.5.0_3.0_1725690763295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cnec_1_1_supertypes_czert_b_base_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cnec_1_1_supertypes_czert_b_base_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cnec_1_1_supertypes_czert_b_base_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/stulcrad/CNEC_1_1_Supertypes_Czert-B-base-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-codebert_small_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-codebert_small_v2_pipeline_en.md new file mode 100644 index 00000000000000..68431a51215d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-codebert_small_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English codebert_small_v2_pipeline pipeline RoBertaEmbeddings from codistai +author: John Snow Labs +name: codebert_small_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`codebert_small_v2_pipeline` is a English model originally trained by codistai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/codebert_small_v2_pipeline_en_5.5.0_3.0_1725673175218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/codebert_small_v2_pipeline_en_5.5.0_3.0_1725673175218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("codebert_small_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("codebert_small_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|codebert_small_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|705.3 MB| + +## References + +https://huggingface.co/codistai/codeBERT-small-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-coha2000s_en.md b/docs/_posts/ahmedlone127/2024-09-07-coha2000s_en.md new file mode 100644 index 00000000000000..910721268e7212 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-coha2000s_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English coha2000s RoBertaEmbeddings from simonmun +author: John Snow Labs +name: coha2000s +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`coha2000s` is a English model originally trained by simonmun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/coha2000s_en_5.5.0_3.0_1725716238186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/coha2000s_en_5.5.0_3.0_1725716238186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("coha2000s","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("coha2000s","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|coha2000s| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.4 MB| + +## References + +https://huggingface.co/simonmun/COHA2000s \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_en.md b/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_en.md new file mode 100644 index 00000000000000..ed8e12cecdc645 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cold_fusion_itr10_seed3 RoBertaForSequenceClassification from ibm +author: John Snow Labs +name: cold_fusion_itr10_seed3 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cold_fusion_itr10_seed3` is a English model originally trained by ibm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cold_fusion_itr10_seed3_en_5.5.0_3.0_1725679489108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cold_fusion_itr10_seed3_en_5.5.0_3.0_1725679489108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("cold_fusion_itr10_seed3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("cold_fusion_itr10_seed3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cold_fusion_itr10_seed3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|467.9 MB| + +## References + +https://huggingface.co/ibm/ColD-Fusion-itr10-seed3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_pipeline_en.md new file mode 100644 index 00000000000000..3d46ac7851c61f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cold_fusion_itr10_seed3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cold_fusion_itr10_seed3_pipeline pipeline RoBertaForSequenceClassification from ibm +author: John Snow Labs +name: cold_fusion_itr10_seed3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cold_fusion_itr10_seed3_pipeline` is a English model originally trained by ibm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cold_fusion_itr10_seed3_pipeline_en_5.5.0_3.0_1725679512052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cold_fusion_itr10_seed3_pipeline_en_5.5.0_3.0_1725679512052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cold_fusion_itr10_seed3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cold_fusion_itr10_seed3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cold_fusion_itr10_seed3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.0 MB| + +## References + +https://huggingface.co/ibm/ColD-Fusion-itr10-seed3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-constructive_model_es.md b/docs/_posts/ahmedlone127/2024-09-07-constructive_model_es.md new file mode 100644 index 00000000000000..163188064c1f60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-constructive_model_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish constructive_model RoBertaForSequenceClassification from rsepulvedat +author: John Snow Labs +name: constructive_model +date: 2024-09-07 +tags: [es, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`constructive_model` is a Castilian, Spanish model originally trained by rsepulvedat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/constructive_model_es_5.5.0_3.0_1725679996723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/constructive_model_es_5.5.0_3.0_1725679996723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("constructive_model","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("constructive_model", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|constructive_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|es| +|Size:|431.8 MB| + +## References + +https://huggingface.co/rsepulvedat/Constructive_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-constructive_model_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-constructive_model_pipeline_es.md new file mode 100644 index 00000000000000..a317c878296589 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-constructive_model_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish constructive_model_pipeline pipeline RoBertaForSequenceClassification from rsepulvedat +author: John Snow Labs +name: constructive_model_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`constructive_model_pipeline` is a Castilian, Spanish model originally trained by rsepulvedat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/constructive_model_pipeline_es_5.5.0_3.0_1725680029248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/constructive_model_pipeline_es_5.5.0_3.0_1725680029248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("constructive_model_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("constructive_model_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|constructive_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|431.8 MB| + +## References + +https://huggingface.co/rsepulvedat/Constructive_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_en.md b/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_en.md new file mode 100644 index 00000000000000..5d79509a1c6e47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English contrabert_c RoBertaEmbeddings from claudios +author: John Snow Labs +name: contrabert_c +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`contrabert_c` is a English model originally trained by claudios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/contrabert_c_en_5.5.0_3.0_1725716354147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/contrabert_c_en_5.5.0_3.0_1725716354147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("contrabert_c","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("contrabert_c","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|contrabert_c| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/claudios/ContraBERT_C \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_pipeline_en.md new file mode 100644 index 00000000000000..90eb8ec4abbcc9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-contrabert_c_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English contrabert_c_pipeline pipeline RoBertaEmbeddings from claudios +author: John Snow Labs +name: contrabert_c_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`contrabert_c_pipeline` is a English model originally trained by claudios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/contrabert_c_pipeline_en_5.5.0_3.0_1725716376241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/contrabert_c_pipeline_en_5.5.0_3.0_1725716376241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("contrabert_c_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("contrabert_c_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|contrabert_c_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/claudios/ContraBERT_C + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-convbert_base_turkish_cased_ner_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-07-convbert_base_turkish_cased_ner_pipeline_tr.md new file mode 100644 index 00000000000000..4977a4524e1b91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-convbert_base_turkish_cased_ner_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish convbert_base_turkish_cased_ner_pipeline pipeline BertForTokenClassification from akdeniz27 +author: John Snow Labs +name: convbert_base_turkish_cased_ner_pipeline +date: 2024-09-07 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`convbert_base_turkish_cased_ner_pipeline` is a Turkish model originally trained by akdeniz27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_cased_ner_pipeline_tr_5.5.0_3.0_1725726216900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/convbert_base_turkish_cased_ner_pipeline_tr_5.5.0_3.0_1725726216900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("convbert_base_turkish_cased_ner_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("convbert_base_turkish_cased_ner_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|convbert_base_turkish_cased_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|400.1 MB| + +## References + +https://huggingface.co/akdeniz27/convbert-base-turkish-cased-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cot_ep3_1122_en.md b/docs/_posts/ahmedlone127/2024-09-07-cot_ep3_1122_en.md new file mode 100644 index 00000000000000..3aebe4ea8dfc63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cot_ep3_1122_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cot_ep3_1122 MPNetEmbeddings from ingeol +author: John Snow Labs +name: cot_ep3_1122 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cot_ep3_1122` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cot_ep3_1122_en_5.5.0_3.0_1725703579846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cot_ep3_1122_en_5.5.0_3.0_1725703579846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("cot_ep3_1122","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("cot_ep3_1122","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cot_ep3_1122| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/cot_ep3_1122 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-covid_qa_distillbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-covid_qa_distillbert_pipeline_en.md new file mode 100644 index 00000000000000..ee1f3eb3bd4087 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-covid_qa_distillbert_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English covid_qa_distillbert_pipeline pipeline DistilBertForQuestionAnswering from shaina +author: John Snow Labs +name: covid_qa_distillbert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_qa_distillbert_pipeline` is a English model originally trained by shaina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_qa_distillbert_pipeline_en_5.5.0_3.0_1725722652728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_qa_distillbert_pipeline_en_5.5.0_3.0_1725722652728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("covid_qa_distillbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("covid_qa_distillbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_qa_distillbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shaina/covid_qa_distillBert + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en.md b/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en.md new file mode 100644 index 00000000000000..d5b8e9edc2ad19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average MPNetEmbeddings from teven +author: John Snow Labs +name: cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en_5.5.0_3.0_1725703724072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_en_5.5.0_3.0_1725703724072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/teven/cross_all_bs320_vanilla_finetuned_WebNLG2020_metric_average \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en.md new file mode 100644 index 00000000000000..a06d24f9fe104a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline pipeline MPNetEmbeddings from teven +author: John Snow Labs +name: cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en_5.5.0_3.0_1725703744749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline_en_5.5.0_3.0_1725703744749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_all_bs320_vanilla_finetuned_webnlg2020_metric_average_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/teven/cross_all_bs320_vanilla_finetuned_WebNLG2020_metric_average + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en.md b/docs/_posts/ahmedlone127/2024-09-07-cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en.md new file mode 100644 index 00000000000000..fe44c7f4d4385d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness MPNetEmbeddings from teven +author: John Snow Labs +name: cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en_5.5.0_3.0_1725703008054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness_en_5.5.0_3.0_1725703008054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_all_mpnet_base_v2_finetuned_webnlg2020_correctness| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/teven/cross_all-mpnet-base-v2_finetuned_WebNLG2020_correctness \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cross_encoder_mmarco_mminilmv2_l12_h384_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-cross_encoder_mmarco_mminilmv2_l12_h384_v1_en.md new file mode 100644 index 00000000000000..3c4a248a7d115b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cross_encoder_mmarco_mminilmv2_l12_h384_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cross_encoder_mmarco_mminilmv2_l12_h384_v1 XlmRoBertaForSequenceClassification from corrius +author: John Snow Labs +name: cross_encoder_mmarco_mminilmv2_l12_h384_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_encoder_mmarco_mminilmv2_l12_h384_v1` is a English model originally trained by corrius. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_encoder_mmarco_mminilmv2_l12_h384_v1_en_5.5.0_3.0_1725671099587.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_encoder_mmarco_mminilmv2_l12_h384_v1_en_5.5.0_3.0_1725671099587.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cross_encoder_mmarco_mminilmv2_l12_h384_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("cross_encoder_mmarco_mminilmv2_l12_h384_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_encoder_mmarco_mminilmv2_l12_h384_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|399.6 MB| + +## References + +https://huggingface.co/corrius/cross-encoder-mmarco-mMiniLMv2-L12-H384-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cuad_distil_governing_law_08_28_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-cuad_distil_governing_law_08_28_v1_en.md new file mode 100644 index 00000000000000..88e4d786dba30c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cuad_distil_governing_law_08_28_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cuad_distil_governing_law_08_28_v1 DistilBertForQuestionAnswering from saraks +author: John Snow Labs +name: cuad_distil_governing_law_08_28_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cuad_distil_governing_law_08_28_v1` is a English model originally trained by saraks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cuad_distil_governing_law_08_28_v1_en_5.5.0_3.0_1725727340428.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cuad_distil_governing_law_08_28_v1_en_5.5.0_3.0_1725727340428.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("cuad_distil_governing_law_08_28_v1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("cuad_distil_governing_law_08_28_v1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cuad_distil_governing_law_08_28_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/saraks/cuad-distil-governing_law-08-28-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-cybert_cyner_en.md b/docs/_posts/ahmedlone127/2024-09-07-cybert_cyner_en.md new file mode 100644 index 00000000000000..ce3dcce960a33f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-cybert_cyner_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English cybert_cyner RoBertaForTokenClassification from anonymouspd +author: John Snow Labs +name: cybert_cyner +date: 2024-09-07 +tags: [roberta, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cybert_cyner` is a English model originally trained by anonymouspd. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cybert_cyner_en_5.5.0_3.0_1725668537805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cybert_cyner_en_5.5.0_3.0_1725668537805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols(["document"]) \ + .setOutputCol("token") + + +tokenClassifier = RoBertaForTokenClassification.pretrained("cybert_cyner","en") \ + .setInputCols(["document","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = Tokenizer() \ + .setInputCols(Array("document")) \ + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification + .pretrained("cybert_cyner", "en") + .setInputCols(Array("document","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cybert_cyner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|311.3 MB| + +## References + +References + +https://huggingface.co/anonymouspd/CyBERT-CyNER \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-deberta_large_twitter_pop_binary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-deberta_large_twitter_pop_binary_pipeline_en.md new file mode 100644 index 00000000000000..867bb1de64f864 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-deberta_large_twitter_pop_binary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deberta_large_twitter_pop_binary_pipeline pipeline RoBertaForSequenceClassification from guyhadad01 +author: John Snow Labs +name: deberta_large_twitter_pop_binary_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_large_twitter_pop_binary_pipeline` is a English model originally trained by guyhadad01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_large_twitter_pop_binary_pipeline_en_5.5.0_3.0_1725718314951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_large_twitter_pop_binary_pipeline_en_5.5.0_3.0_1725718314951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deberta_large_twitter_pop_binary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deberta_large_twitter_pop_binary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_large_twitter_pop_binary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/guyhadad01/deberta-large-twitter-pop-binary + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-demomodel_en.md b/docs/_posts/ahmedlone127/2024-09-07-demomodel_en.md new file mode 100644 index 00000000000000..2c4b5184a3e8e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-demomodel_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English demomodel RoBertaForSequenceClassification from afroz14 +author: John Snow Labs +name: demomodel +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`demomodel` is a English model originally trained by afroz14. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/demomodel_en_5.5.0_3.0_1725680134581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/demomodel_en_5.5.0_3.0_1725680134581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("demomodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("demomodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|demomodel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|465.4 MB| + +## References + +https://huggingface.co/afroz14/demomodel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-demomodel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-demomodel_pipeline_en.md new file mode 100644 index 00000000000000..391abffe1ed410 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-demomodel_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English demomodel_pipeline pipeline RoBertaForSequenceClassification from afroz14 +author: John Snow Labs +name: demomodel_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`demomodel_pipeline` is a English model originally trained by afroz14. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/demomodel_pipeline_en_5.5.0_3.0_1725680156444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/demomodel_pipeline_en_5.5.0_3.0_1725680156444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("demomodel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("demomodel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|demomodel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.4 MB| + +## References + +https://huggingface.co/afroz14/demomodel + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-deproberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-deproberta_en.md new file mode 100644 index 00000000000000..942aa0355e6067 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-deproberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English deproberta RoBertaEmbeddings from Qichen123 +author: John Snow Labs +name: deproberta +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deproberta` is a English model originally trained by Qichen123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deproberta_en_5.5.0_3.0_1725716022841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deproberta_en_5.5.0_3.0_1725716022841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("deproberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("deproberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deproberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Qichen123/DepRoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-deproberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-deproberta_pipeline_en.md new file mode 100644 index 00000000000000..7d545efe46db39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-deproberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English deproberta_pipeline pipeline RoBertaEmbeddings from Qichen123 +author: John Snow Labs +name: deproberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deproberta_pipeline` is a English model originally trained by Qichen123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deproberta_pipeline_en_5.5.0_3.0_1725716080011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deproberta_pipeline_en_5.5.0_3.0_1725716080011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("deproberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("deproberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deproberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Qichen123/DepRoBERTa + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_en.md new file mode 100644 index 00000000000000..c95135196a47c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English disaster_cutom_ner_v1 DistilBertForTokenClassification from DipeshY +author: John Snow Labs +name: disaster_cutom_ner_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disaster_cutom_ner_v1` is a English model originally trained by DipeshY. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disaster_cutom_ner_v1_en_5.5.0_3.0_1725733954186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disaster_cutom_ner_v1_en_5.5.0_3.0_1725733954186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("disaster_cutom_ner_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("disaster_cutom_ner_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disaster_cutom_ner_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DipeshY/disaster_cutom_ner_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_pipeline_en.md new file mode 100644 index 00000000000000..4bfdfdeebc9e44 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-disaster_cutom_ner_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English disaster_cutom_ner_v1_pipeline pipeline DistilBertForTokenClassification from DipeshY +author: John Snow Labs +name: disaster_cutom_ner_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`disaster_cutom_ner_v1_pipeline` is a English model originally trained by DipeshY. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/disaster_cutom_ner_v1_pipeline_en_5.5.0_3.0_1725733965620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/disaster_cutom_ner_v1_pipeline_en_5.5.0_3.0_1725733965620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("disaster_cutom_ner_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("disaster_cutom_ner_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|disaster_cutom_ner_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DipeshY/disaster_cutom_ner_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distil_train_token_classification_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-09-07-distil_train_token_classification_nepal_bhasa_en.md new file mode 100644 index 00000000000000..f5e5e9df219ab5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distil_train_token_classification_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distil_train_token_classification_nepal_bhasa DistilBertForTokenClassification from ymgong +author: John Snow Labs +name: distil_train_token_classification_nepal_bhasa +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_train_token_classification_nepal_bhasa` is a English model originally trained by ymgong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_train_token_classification_nepal_bhasa_en_5.5.0_3.0_1725734209948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_train_token_classification_nepal_bhasa_en_5.5.0_3.0_1725734209948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distil_train_token_classification_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distil_train_token_classification_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_train_token_classification_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ymgong/distil_train_token_classification_new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..7e84c44a3e66da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_distilbert_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_cased_distilled_squad_distilbert_pipeline pipeline DistilBertForQuestionAnswering from distilbert +author: John Snow Labs +name: distilbert_base_cased_distilled_squad_distilbert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_distilled_squad_distilbert_pipeline` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_distilbert_pipeline_en_5.5.0_3.0_1725695214350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_distilbert_pipeline_en_5.5.0_3.0_1725695214350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_distilled_squad_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_distilled_squad_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_distilled_squad_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_en.md new file mode 100644 index 00000000000000..eb8eaae8ed84ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_cased_distilled_squad_v2 DistilBertForQuestionAnswering from jackfriedson +author: John Snow Labs +name: distilbert_base_cased_distilled_squad_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_distilled_squad_v2` is a English model originally trained by jackfriedson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_v2_en_5.5.0_3.0_1725726988749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_v2_en_5.5.0_3.0_1725726988749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_distilled_squad_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_distilled_squad_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_distilled_squad_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/jackfriedson/distilbert-base-cased-distilled-squad-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_pipeline_en.md new file mode 100644 index 00000000000000..09b097eda8ed53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_distilled_squad_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_cased_distilled_squad_v2_pipeline pipeline DistilBertForQuestionAnswering from jackfriedson +author: John Snow Labs +name: distilbert_base_cased_distilled_squad_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_distilled_squad_v2_pipeline` is a English model originally trained by jackfriedson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_v2_pipeline_en_5.5.0_3.0_1725727001429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilled_squad_v2_pipeline_en_5.5.0_3.0_1725727001429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_distilled_squad_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_distilled_squad_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_distilled_squad_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/jackfriedson/distilbert-base-cased-distilled-squad-v2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_finetuned_pfe_projectt_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_finetuned_pfe_projectt_en.md new file mode 100644 index 00000000000000..109d45a32a2660 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_cased_finetuned_pfe_projectt_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_pfe_projectt DistilBertForQuestionAnswering from onsba +author: John Snow Labs +name: distilbert_base_cased_finetuned_pfe_projectt +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_pfe_projectt` is a English model originally trained by onsba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_pfe_projectt_en_5.5.0_3.0_1725727230586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_pfe_projectt_en_5.5.0_3.0_1725727230586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_finetuned_pfe_projectt","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_cased_finetuned_pfe_projectt", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_pfe_projectt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/onsba/distilbert-base-cased-finetuned-pfe-projectt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_en.md new file mode 100644 index 00000000000000..3bf9512e9be73b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_finetuned_rock_argentino DistilBertEmbeddings from SantiRimedio +author: John Snow Labs +name: distilbert_base_spanish_uncased_finetuned_rock_argentino +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_finetuned_rock_argentino` is a English model originally trained by SantiRimedio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_rock_argentino_en_5.5.0_3.0_1725742313628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_rock_argentino_en_5.5.0_3.0_1725742313628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased_finetuned_rock_argentino","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased_finetuned_rock_argentino","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_finetuned_rock_argentino| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/SantiRimedio/distilbert-base-spanish-uncased-finetuned-rock-argentino \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en.md new file mode 100644 index 00000000000000..14138040de0791 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline pipeline DistilBertEmbeddings from SantiRimedio +author: John Snow Labs +name: distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline` is a English model originally trained by SantiRimedio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en_5.5.0_3.0_1725742325285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline_en_5.5.0_3.0_1725742325285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_finetuned_rock_argentino_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/SantiRimedio/distilbert-base-spanish-uncased-finetuned-rock-argentino + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_text_intelligence_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_text_intelligence_en.md new file mode 100644 index 00000000000000..e16b021b11df1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_spanish_uncased_finetuned_text_intelligence_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_finetuned_text_intelligence DistilBertForSequenceClassification from chris32 +author: John Snow Labs +name: distilbert_base_spanish_uncased_finetuned_text_intelligence +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_finetuned_text_intelligence` is a English model originally trained by chris32. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_text_intelligence_en_5.5.0_3.0_1725674664887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_text_intelligence_en_5.5.0_3.0_1725674664887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_spanish_uncased_finetuned_text_intelligence","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_spanish_uncased_finetuned_text_intelligence", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_finetuned_text_intelligence| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|252.4 MB| + +## References + +https://huggingface.co/chris32/distilbert-base-spanish-uncased-finetuned-text-intelligence \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_detected_jailbreak_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_detected_jailbreak_pipeline_en.md new file mode 100644 index 00000000000000..7f491471902bae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_detected_jailbreak_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_detected_jailbreak_pipeline pipeline DistilBertForSequenceClassification from Necent +author: John Snow Labs +name: distilbert_base_uncased_detected_jailbreak_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_detected_jailbreak_pipeline` is a English model originally trained by Necent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_detected_jailbreak_pipeline_en_5.5.0_3.0_1725674633452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_detected_jailbreak_pipeline_en_5.5.0_3.0_1725674633452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_detected_jailbreak_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_detected_jailbreak_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_detected_jailbreak_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Necent/distilbert-base-uncased-detected-jailbreak + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_distilled_clinc_schnatz65_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_distilled_clinc_schnatz65_en.md new file mode 100644 index 00000000000000..8ebd04a1bb959f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_distilled_clinc_schnatz65_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_distilled_clinc_schnatz65 DistilBertForSequenceClassification from Schnatz65 +author: John Snow Labs +name: distilbert_base_uncased_distilled_clinc_schnatz65 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_distilled_clinc_schnatz65` is a English model originally trained by Schnatz65. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_clinc_schnatz65_en_5.5.0_3.0_1725674551466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_distilled_clinc_schnatz65_en_5.5.0_3.0_1725674551466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_distilled_clinc_schnatz65","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_distilled_clinc_schnatz65", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_distilled_clinc_schnatz65| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/Schnatz65/distilbert-base-uncased-distilled-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_en.md new file mode 100644 index 00000000000000..e9cb21ad7234c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cefr DistilBertForTokenClassification from DioBot2000 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cefr +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cefr` is a English model originally trained by DioBot2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cefr_en_5.5.0_3.0_1725734236310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cefr_en_5.5.0_3.0_1725734236310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_cefr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_cefr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cefr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DioBot2000/distilbert-base-uncased-finetuned-CEFR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_pipeline_en.md new file mode 100644 index 00000000000000..75cee02efbc75d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_cefr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cefr_pipeline pipeline DistilBertForTokenClassification from DioBot2000 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cefr_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cefr_pipeline` is a English model originally trained by DioBot2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cefr_pipeline_en_5.5.0_3.0_1725734247994.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cefr_pipeline_en_5.5.0_3.0_1725734247994.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_cefr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_cefr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cefr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/DioBot2000/distilbert-base-uncased-finetuned-CEFR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_clinc_jeremygf_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_clinc_jeremygf_en.md new file mode 100644 index 00000000000000..93f698f1c7be96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_clinc_jeremygf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_jeremygf DistilBertForSequenceClassification from jeremygf +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_jeremygf +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_jeremygf` is a English model originally trained by jeremygf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_jeremygf_en_5.5.0_3.0_1725674833268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_jeremygf_en_5.5.0_3.0_1725674833268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_jeremygf","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_jeremygf", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_jeremygf| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/jeremygf/distilbert-base-uncased-finetuned-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_con_dataset_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_con_dataset_en.md new file mode 100644 index 00000000000000..681ee88f3e9cda --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_con_dataset_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_con_dataset DistilBertForTokenClassification from Emmanuelalo52 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_con_dataset +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_con_dataset` is a English model originally trained by Emmanuelalo52. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_con_dataset_en_5.5.0_3.0_1725730146041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_con_dataset_en_5.5.0_3.0_1725730146041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_con_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_con_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_con_dataset| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Emmanuelalo52/distilbert-base-uncased-finetuned-con-dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_en.md new file mode 100644 index 00000000000000..d75125e7261dcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_dol_ecab DistilBertEmbeddings from Aventicity +author: John Snow Labs +name: distilbert_base_uncased_finetuned_dol_ecab +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_dol_ecab` is a English model originally trained by Aventicity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dol_ecab_en_5.5.0_3.0_1725742192735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dol_ecab_en_5.5.0_3.0_1725742192735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_dol_ecab","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_dol_ecab","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_dol_ecab| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Aventicity/distilbert-base-uncased-finetuned-dol_ecab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_pipeline_en.md new file mode 100644 index 00000000000000..38a61bec3f7fab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_dol_ecab_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_dol_ecab_pipeline pipeline DistilBertEmbeddings from Aventicity +author: John Snow Labs +name: distilbert_base_uncased_finetuned_dol_ecab_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_dol_ecab_pipeline` is a English model originally trained by Aventicity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dol_ecab_pipeline_en_5.5.0_3.0_1725742204303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dol_ecab_pipeline_en_5.5.0_3.0_1725742204303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_dol_ecab_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_dol_ecab_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_dol_ecab_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Aventicity/distilbert-base-uncased-finetuned-dol_ecab + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en.md new file mode 100644 index 00000000000000..b57cffd49eb021 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5 DistilBertEmbeddings from abh1na5 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5` is a English model originally trained by abh1na5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en_5.5.0_3.0_1725742131365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_en_5.5.0_3.0_1725742131365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/abh1na5/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en.md new file mode 100644 index 00000000000000..6da9e6c0ccd603 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline pipeline DistilBertEmbeddings from abh1na5 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline` is a English model originally trained by abh1na5. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en_5.5.0_3.0_1725742143429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline_en_5.5.0_3.0_1725742143429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_abh1na5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/abh1na5/distilbert-base-uncased-finetuned-imdb-accelerate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en.md new file mode 100644 index 00000000000000..11f2b232b4680f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks DistilBertEmbeddings from zmeeks +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks` is a English model originally trained by zmeeks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en_5.5.0_3.0_1725742653290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks_en_5.5.0_3.0_1725742653290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_zmeeks| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/zmeeks/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_en.md new file mode 100644 index 00000000000000..9015cd1b5ae021 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_coign DistilBertEmbeddings from coign +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_coign +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_coign` is a English model originally trained by coign. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coign_en_5.5.0_3.0_1725742131195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coign_en_5.5.0_3.0_1725742131195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_coign","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_coign","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_coign| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/coign/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_pipeline_en.md new file mode 100644 index 00000000000000..ab28e1ff7462ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_coign_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_coign_pipeline pipeline DistilBertEmbeddings from coign +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_coign_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_coign_pipeline` is a English model originally trained by coign. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coign_pipeline_en_5.5.0_3.0_1725742142658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coign_pipeline_en_5.5.0_3.0_1725742142658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_coign_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_coign_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_coign_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/coign/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en.md new file mode 100644 index 00000000000000..c4804a1bdb5942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jinq047_pipeline pipeline DistilBertEmbeddings from jinq047 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jinq047_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jinq047_pipeline` is a English model originally trained by jinq047. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en_5.5.0_3.0_1725742556528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jinq047_pipeline_en_5.5.0_3.0_1725742556528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_jinq047_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_jinq047_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jinq047_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jinq047/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_en.md new file mode 100644 index 00000000000000..b383e166199bbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_pkyriakis DistilBertEmbeddings from pkyriakis +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_pkyriakis +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_pkyriakis` is a English model originally trained by pkyriakis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pkyriakis_en_5.5.0_3.0_1725742327278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pkyriakis_en_5.5.0_3.0_1725742327278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pkyriakis","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pkyriakis","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_pkyriakis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pkyriakis/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en.md new file mode 100644 index 00000000000000..b8c654f7106a62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline pipeline DistilBertEmbeddings from pkyriakis +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline` is a English model originally trained by pkyriakis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en_5.5.0_3.0_1725742340023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline_en_5.5.0_3.0_1725742340023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_pkyriakis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pkyriakis/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_raincheck_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_raincheck_en.md new file mode 100644 index 00000000000000..a884eb9d6b4e75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_raincheck_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_raincheck DistilBertEmbeddings from Raincheck +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_raincheck +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_raincheck` is a English model originally trained by Raincheck. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raincheck_en_5.5.0_3.0_1725742231117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raincheck_en_5.5.0_3.0_1725742231117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_raincheck","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_raincheck","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_raincheck| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Raincheck/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_en.md new file mode 100644 index 00000000000000..a740d7211a83f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rohbro DistilBertEmbeddings from rohbro +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rohbro +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rohbro` is a English model originally trained by rohbro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohbro_en_5.5.0_3.0_1725742253244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohbro_en_5.5.0_3.0_1725742253244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rohbro","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rohbro","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rohbro| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rohbro/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en.md new file mode 100644 index 00000000000000..19262d84edbc1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rohbro_pipeline pipeline DistilBertEmbeddings from rohbro +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rohbro_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rohbro_pipeline` is a English model originally trained by rohbro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en_5.5.0_3.0_1725742265584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rohbro_pipeline_en_5.5.0_3.0_1725742265584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rohbro_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_rohbro_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rohbro_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rohbro/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en.md new file mode 100644 index 00000000000000..f5d2c17abe63ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_dev4952_pipeline pipeline DistilBertForTokenClassification from dev4952 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_dev4952_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_dev4952_pipeline` is a English model originally trained by dev4952. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en_5.5.0_3.0_1725739581244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_dev4952_pipeline_en_5.5.0_3.0_1725739581244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_dev4952_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_dev4952_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_dev4952_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dev4952/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en.md new file mode 100644 index 00000000000000..1353602f43b024 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_harsh1304_pipeline pipeline BertForTokenClassification from harsh1304 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_harsh1304_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_harsh1304_pipeline` is a English model originally trained by harsh1304. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en_5.5.0_3.0_1725734979733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_harsh1304_pipeline_en_5.5.0_3.0_1725734979733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_harsh1304_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_harsh1304_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_harsh1304_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/harsh1304/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_hemg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_hemg_pipeline_en.md new file mode 100644 index 00000000000000..9d2f2be88bce2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_hemg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_hemg_pipeline pipeline DistilBertForTokenClassification from Hemg +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_hemg_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_hemg_pipeline` is a English model originally trained by Hemg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hemg_pipeline_en_5.5.0_3.0_1725739723503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_hemg_pipeline_en_5.5.0_3.0_1725739723503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_hemg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_hemg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_hemg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Hemg/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en.md new file mode 100644 index 00000000000000..69a87977c37e01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline pipeline DistilBertForTokenClassification from Priyabrata018 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline` is a English model originally trained by Priyabrata018. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en_5.5.0_3.0_1725739670686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline_en_5.5.0_3.0_1725739670686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_priyabrata018_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Priyabrata018/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_raulgdp_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_raulgdp_en.md new file mode 100644 index 00000000000000..ebeefa8a87232a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_raulgdp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_raulgdp DistilBertForTokenClassification from raulgdp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_raulgdp +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_raulgdp` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_raulgdp_en_5.5.0_3.0_1725734407628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_raulgdp_en_5.5.0_3.0_1725734407628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_raulgdp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_raulgdp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_raulgdp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/raulgdp/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en.md new file mode 100644 index 00000000000000..85e0f00b989dd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline pipeline DistilBertForTokenClassification from renardkorzeniowski +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline` is a English model originally trained by renardkorzeniowski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en_5.5.0_3.0_1725731238366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline_en_5.5.0_3.0_1725731238366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_renardkorzeniowski_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/renardkorzeniowski/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_en.md new file mode 100644 index 00000000000000..afe57fa24e871f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_shashank612 DistilBertForTokenClassification from shashank612 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_shashank612 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_shashank612` is a English model originally trained by shashank612. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shashank612_en_5.5.0_3.0_1725734026232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shashank612_en_5.5.0_3.0_1725734026232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_shashank612","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_shashank612", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_shashank612| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/shashank612/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en.md new file mode 100644 index 00000000000000..89c18bb2bc2353 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_shashank612_pipeline pipeline DistilBertForTokenClassification from shashank612 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_shashank612_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_shashank612_pipeline` is a English model originally trained by shashank612. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en_5.5.0_3.0_1725734037907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_shashank612_pipeline_en_5.5.0_3.0_1725734037907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_ner_shashank612_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_ner_shashank612_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_shashank612_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/shashank612/distilbert-base-uncased-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_wangyue6761_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_wangyue6761_en.md new file mode 100644 index 00000000000000..7e7e772f6b187c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_ner_wangyue6761_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ner_wangyue6761 DistilBertForTokenClassification from wangyue6761 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ner_wangyue6761 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ner_wangyue6761` is a English model originally trained by wangyue6761. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_wangyue6761_en_5.5.0_3.0_1725733923463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ner_wangyue6761_en_5.5.0_3.0_1725733923463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_wangyue6761","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_ner_wangyue6761", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ner_wangyue6761| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/wangyue6761/distilbert-base-uncased-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_en.md new file mode 100644 index 00000000000000..e32303307f719b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_nersd DistilBertForTokenClassification from soniquentin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_nersd +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_nersd` is a English model originally trained by soniquentin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nersd_en_5.5.0_3.0_1725731598405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nersd_en_5.5.0_3.0_1725731598405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_nersd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_base_uncased_finetuned_nersd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_nersd| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|377.4 MB| + +## References + +https://huggingface.co/soniquentin/distilbert-base-uncased-finetuned-nersd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_pipeline_en.md new file mode 100644 index 00000000000000..a1d78f429a84ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_nersd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_nersd_pipeline pipeline DistilBertForTokenClassification from soniquentin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_nersd_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_nersd_pipeline` is a English model originally trained by soniquentin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nersd_pipeline_en_5.5.0_3.0_1725731615240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nersd_pipeline_en_5.5.0_3.0_1725731615240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_nersd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_nersd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_nersd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|377.4 MB| + +## References + +https://huggingface.co/soniquentin/distilbert-base-uncased-finetuned-nersd + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bat007_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bat007_pipeline_en.md new file mode 100644 index 00000000000000..ccc45113b6db35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bat007_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_bat007_pipeline pipeline DistilBertForQuestionAnswering from BAT007 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_bat007_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_bat007_pipeline` is a English model originally trained by BAT007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bat007_pipeline_en_5.5.0_3.0_1725736176081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bat007_pipeline_en_5.5.0_3.0_1725736176081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_bat007_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_bat007_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_bat007_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/BAT007/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en.md new file mode 100644 index 00000000000000..480a8ce7c6dcab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline pipeline DistilBertForQuestionAnswering from begoniabcs +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline` is a English model originally trained by begoniabcs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en_5.5.0_3.0_1725745992049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline_en_5.5.0_3.0_1725745992049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_begoniabcs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/begoniabcs/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_en.md new file mode 100644 index 00000000000000..18a15e6ddd2974 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_bighands23 DistilBertForQuestionAnswering from bighands23 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_bighands23 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_bighands23` is a English model originally trained by bighands23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bighands23_en_5.5.0_3.0_1725745711652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bighands23_en_5.5.0_3.0_1725745711652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_bighands23","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_bighands23", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_bighands23| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bighands23/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en.md new file mode 100644 index 00000000000000..261e26e935bf92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_bighands23_pipeline pipeline DistilBertForQuestionAnswering from bighands23 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_bighands23_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_bighands23_pipeline` is a English model originally trained by bighands23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en_5.5.0_3.0_1725745723355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_bighands23_pipeline_en_5.5.0_3.0_1725745723355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_bighands23_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_bighands23_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_bighands23_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bighands23/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en.md new file mode 100644 index 00000000000000..a2f1a725765d25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline pipeline DistilBertForQuestionAnswering from dchung117 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline` is a English model originally trained by dchung117. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en_5.5.0_3.0_1725695414659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline_en_5.5.0_3.0_1725695414659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/dchung117/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en.md new file mode 100644 index 00000000000000..b1c73b3ee867e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dharma20 DistilBertForQuestionAnswering from Dharma20 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dharma20 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dharma20` is a English model originally trained by Dharma20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en_5.5.0_3.0_1725727601871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_en_5.5.0_3.0_1725727601871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dharma20","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dharma20", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dharma20| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dharma20/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en.md new file mode 100644 index 00000000000000..feade6d7bc97bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline pipeline DistilBertForQuestionAnswering from Dharma20 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline` is a English model originally trained by Dharma20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en_5.5.0_3.0_1725727613609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline_en_5.5.0_3.0_1725727613609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dharma20_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Dharma20/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md new file mode 100644 index 00000000000000..01dfaa82f99833 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989 DistilBertEmbeddings from gautam1989 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989 +date: 2024-09-07 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989` is a English model originally trained by gautam1989. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en_5.5.0_3.0_1725727644421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en_5.5.0_3.0_1725727644421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/gautam1989/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en.md new file mode 100644 index 00000000000000..c4b4f40c039ec6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline pipeline DistilBertForQuestionAnswering from gautam1989 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline` is a English model originally trained by gautam1989. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en_5.5.0_3.0_1725727655976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline_en_5.5.0_3.0_1725727655976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gautam1989/distilbert-base-uncased-finetuned-squad-d5716d28 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md new file mode 100644 index 00000000000000..d9cf595b7f6e8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81 DistilBertEmbeddings from juancopi81 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81 +date: 2024-09-07 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81` is a English model originally trained by juancopi81. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en_5.5.0_3.0_1725722509347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en_5.5.0_3.0_1725722509347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/juancopi81/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md new file mode 100644 index 00000000000000..ef7ea35c1c81f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea DistilBertEmbeddings from jwlovetea +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea +date: 2024-09-07 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea` is a English model originally trained by jwlovetea. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en_5.5.0_3.0_1725695400643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en_5.5.0_3.0_1725695400643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/jwlovetea/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md new file mode 100644 index 00000000000000..e0c7fda9a23bcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero DistilBertEmbeddings from osanseviero +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero +date: 2024-09-07 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero` is a English model originally trained by osanseviero. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en_5.5.0_3.0_1725695198420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en_5.5.0_3.0_1725695198420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +https://huggingface.co/osanseviero/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_en.md new file mode 100644 index 00000000000000..c6821464f44569 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_decre99 DistilBertForQuestionAnswering from Decre99 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_decre99 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_decre99` is a English model originally trained by Decre99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_decre99_en_5.5.0_3.0_1725695421053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_decre99_en_5.5.0_3.0_1725695421053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_decre99","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_decre99", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_decre99| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Decre99/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_pipeline_en.md new file mode 100644 index 00000000000000..02820cda815f05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_decre99_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_decre99_pipeline pipeline DistilBertForQuestionAnswering from Decre99 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_decre99_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_decre99_pipeline` is a English model originally trained by Decre99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_decre99_pipeline_en_5.5.0_3.0_1725695433088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_decre99_pipeline_en_5.5.0_3.0_1725695433088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_decre99_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_decre99_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_decre99_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Decre99/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_en.md new file mode 100644 index 00000000000000..5a920c9a61d02d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_devsick DistilBertForQuestionAnswering from devsick +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_devsick +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_devsick` is a English model originally trained by devsick. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_devsick_en_5.5.0_3.0_1725726989652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_devsick_en_5.5.0_3.0_1725726989652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_devsick","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_devsick", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_devsick| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/devsick/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_pipeline_en.md new file mode 100644 index 00000000000000..ea8d4c6c948370 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_devsick_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_devsick_pipeline pipeline DistilBertForQuestionAnswering from devsick +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_devsick_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_devsick_pipeline` is a English model originally trained by devsick. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_devsick_pipeline_en_5.5.0_3.0_1725727004140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_devsick_pipeline_en_5.5.0_3.0_1725727004140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_devsick_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_devsick_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_devsick_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/devsick/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ep8_batch16_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ep8_batch16_en.md new file mode 100644 index 00000000000000..6d141d442b4c6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ep8_batch16_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_ep8_batch16 DistilBertForQuestionAnswering from wieheistdu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_ep8_batch16 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_ep8_batch16` is a English model originally trained by wieheistdu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_ep8_batch16_en_5.5.0_3.0_1725736058023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_ep8_batch16_en_5.5.0_3.0_1725736058023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_ep8_batch16","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_ep8_batch16", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_ep8_batch16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wieheistdu/distilbert-base-uncased-finetuned-squad-ep8-batch16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_gyeol0225_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_gyeol0225_en.md new file mode 100644 index 00000000000000..19ab768bb23976 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_gyeol0225_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_gyeol0225 DistilBertForQuestionAnswering from gyeol0225 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_gyeol0225 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_gyeol0225` is a English model originally trained by gyeol0225. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_gyeol0225_en_5.5.0_3.0_1725736241588.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_gyeol0225_en_5.5.0_3.0_1725736241588.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_gyeol0225","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_gyeol0225", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_gyeol0225| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gyeol0225/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en.md new file mode 100644 index 00000000000000..85762506ea1263 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_ikeofai_pipeline pipeline DistilBertForQuestionAnswering from Ikeofai +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_ikeofai_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_ikeofai_pipeline` is a English model originally trained by Ikeofai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en_5.5.0_3.0_1725695717112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_ikeofai_pipeline_en_5.5.0_3.0_1725695717112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_ikeofai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_ikeofai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_ikeofai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Ikeofai/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_en.md new file mode 100644 index 00000000000000..375e797186f5f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_injustice DistilBertForQuestionAnswering from Injustice +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_injustice +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_injustice` is a English model originally trained by Injustice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_injustice_en_5.5.0_3.0_1725726989230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_injustice_en_5.5.0_3.0_1725726989230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_injustice","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_injustice", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_injustice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Injustice/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_pipeline_en.md new file mode 100644 index 00000000000000..a1a687225c7cde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_injustice_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_injustice_pipeline pipeline DistilBertForQuestionAnswering from Injustice +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_injustice_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_injustice_pipeline` is a English model originally trained by Injustice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_injustice_pipeline_en_5.5.0_3.0_1725727004153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_injustice_pipeline_en_5.5.0_3.0_1725727004153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_injustice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_injustice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_injustice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Injustice/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_sasa3396_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_sasa3396_en.md new file mode 100644 index 00000000000000..5f051e39def459 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_sasa3396_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_sasa3396 DistilBertForQuestionAnswering from sasa3396 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_sasa3396 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_sasa3396` is a English model originally trained by sasa3396. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sasa3396_en_5.5.0_3.0_1725746314560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_sasa3396_en_5.5.0_3.0_1725746314560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_sasa3396","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_sasa3396", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_sasa3396| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sasa3396/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_soikit_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_soikit_pipeline_en.md new file mode 100644 index 00000000000000..15a96dd74782e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_soikit_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_soikit_pipeline pipeline DistilBertForQuestionAnswering from soikit +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_soikit_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_soikit_pipeline` is a English model originally trained by soikit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_soikit_pipeline_en_5.5.0_3.0_1725695181883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_soikit_pipeline_en_5.5.0_3.0_1725695181883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_squad_soikit_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_squad_soikit_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_soikit_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/soikit/distilbert-base-uncased-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_vijaym_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_vijaym_en.md new file mode 100644 index 00000000000000..68e816e4439fde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_vijaym_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_vijaym DistilBertForQuestionAnswering from vijaym +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_vijaym +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_vijaym` is a English model originally trained by vijaym. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_vijaym_en_5.5.0_3.0_1725727102331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_vijaym_en_5.5.0_3.0_1725727102331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_vijaym","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_vijaym", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_vijaym| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vijaym/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_yweslakarep_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_yweslakarep_en.md new file mode 100644 index 00000000000000..25c3ab931b6bb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_squad_yweslakarep_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_yweslakarep DistilBertForQuestionAnswering from yweslakarep +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_yweslakarep +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_yweslakarep` is a English model originally trained by yweslakarep. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_yweslakarep_en_5.5.0_3.0_1725727224477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_yweslakarep_en_5.5.0_3.0_1725727224477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_yweslakarep","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_finetuned_squad_yweslakarep", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_yweslakarep| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/yweslakarep/distilbert-base-uncased-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_wikiann_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_wikiann_pipeline_en.md new file mode 100644 index 00000000000000..dfabf08778795f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_finetuned_wikiann_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_wikiann_pipeline pipeline DistilBertForTokenClassification from hannahisrael03 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_wikiann_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_wikiann_pipeline` is a English model originally trained by hannahisrael03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wikiann_pipeline_en_5.5.0_3.0_1725729784678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wikiann_pipeline_en_5.5.0_3.0_1725729784678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_wikiann_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_wikiann_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_wikiann_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hannahisrael03/distilbert-base-uncased-finetuned-wikiann + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_pii_200_burkelive_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_pii_200_burkelive_pipeline_en.md new file mode 100644 index 00000000000000..3eb0e128407845 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_pii_200_burkelive_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_pii_200_burkelive_pipeline pipeline DistilBertForTokenClassification from burkelive +author: John Snow Labs +name: distilbert_base_uncased_pii_200_burkelive_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_pii_200_burkelive_pipeline` is a English model originally trained by burkelive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_pii_200_burkelive_pipeline_en_5.5.0_3.0_1725731112852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_pii_200_burkelive_pipeline_en_5.5.0_3.0_1725731112852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_pii_200_burkelive_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_pii_200_burkelive_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_pii_200_burkelive_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/burkelive/distilbert-base-uncased-pii-200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_lora_merged_jeukhwang_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_lora_merged_jeukhwang_en.md new file mode 100644 index 00000000000000..fd359d0c5fe999 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_lora_merged_jeukhwang_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_lora_merged_jeukhwang DistilBertForQuestionAnswering from JeukHwang +author: John Snow Labs +name: distilbert_base_uncased_squad2_lora_merged_jeukhwang +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_lora_merged_jeukhwang` is a English model originally trained by JeukHwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_lora_merged_jeukhwang_en_5.5.0_3.0_1725745954708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_lora_merged_jeukhwang_en_5.5.0_3.0_1725745954708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_lora_merged_jeukhwang","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_lora_merged_jeukhwang", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_lora_merged_jeukhwang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/JeukHwang/distilbert-base-uncased-squad2-lora-merged \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p10_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p10_en.md new file mode 100644 index 00000000000000..1038e5df79ec90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p10_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_p10 DistilBertForQuestionAnswering from pminha +author: John Snow Labs +name: distilbert_base_uncased_squad2_p10 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_p10` is a English model originally trained by pminha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p10_en_5.5.0_3.0_1725745993304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p10_en_5.5.0_3.0_1725745993304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_p10","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_base_uncased_squad2_p10", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_p10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|237.6 MB| + +## References + +https://huggingface.co/pminha/distilbert-base-uncased-squad2-p10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p15_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p15_pipeline_en.md new file mode 100644 index 00000000000000..a599c167654b8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_p15_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_p15_pipeline pipeline DistilBertForQuestionAnswering from pminha +author: John Snow Labs +name: distilbert_base_uncased_squad2_p15_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_p15_pipeline` is a English model originally trained by pminha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p15_pipeline_en_5.5.0_3.0_1725727455557.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_p15_pipeline_en_5.5.0_3.0_1725727455557.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_squad2_p15_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_squad2_p15_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_p15_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|231.5 MB| + +## References + +https://huggingface.co/pminha/distilbert-base-uncased-squad2-p15 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_pruned_p30_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_pruned_p30_pipeline_en.md new file mode 100644 index 00000000000000..c003639234797e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_base_uncased_squad2_pruned_p30_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_base_uncased_squad2_pruned_p30_pipeline pipeline DistilBertForQuestionAnswering from pminha +author: John Snow Labs +name: distilbert_base_uncased_squad2_pruned_p30_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_squad2_pruned_p30_pipeline` is a English model originally trained by pminha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_pruned_p30_pipeline_en_5.5.0_3.0_1725746172026.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_squad2_pruned_p30_pipeline_en_5.5.0_3.0_1725746172026.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_squad2_pruned_p30_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_squad2_pruned_p30_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_squad2_pruned_p30_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|213.1 MB| + +## References + +https://huggingface.co/pminha/distilbert-base-uncased-squad2-pruned-p30 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_ner_veronica1608_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_ner_veronica1608_pipeline_en.md new file mode 100644 index 00000000000000..1581b0d997409b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_ner_veronica1608_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_finetuned_ner_veronica1608_pipeline pipeline DistilBertForTokenClassification from veronica1608 +author: John Snow Labs +name: distilbert_finetuned_ner_veronica1608_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_ner_veronica1608_pipeline` is a English model originally trained by veronica1608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_veronica1608_pipeline_en_5.5.0_3.0_1725734342747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_ner_veronica1608_pipeline_en_5.5.0_3.0_1725734342747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_ner_veronica1608_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_ner_veronica1608_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_ner_veronica1608_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/veronica1608/distilbert-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_en.md new file mode 100644 index 00000000000000..ddaca45fd3085f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squad_hhjingbo DistilBertForQuestionAnswering from HHJingbo +author: John Snow Labs +name: distilbert_finetuned_squad_hhjingbo +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squad_hhjingbo` is a English model originally trained by HHJingbo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_hhjingbo_en_5.5.0_3.0_1725736245731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_hhjingbo_en_5.5.0_3.0_1725736245731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squad_hhjingbo","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squad_hhjingbo", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squad_hhjingbo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/HHJingbo/distilbert-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_pipeline_en.md new file mode 100644 index 00000000000000..4533771957e4d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squad_hhjingbo_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squad_hhjingbo_pipeline pipeline DistilBertForQuestionAnswering from HHJingbo +author: John Snow Labs +name: distilbert_finetuned_squad_hhjingbo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squad_hhjingbo_pipeline` is a English model originally trained by HHJingbo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_hhjingbo_pipeline_en_5.5.0_3.0_1725736257754.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squad_hhjingbo_pipeline_en_5.5.0_3.0_1725736257754.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squad_hhjingbo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squad_hhjingbo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squad_hhjingbo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/HHJingbo/distilbert-finetuned-squad + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_eenda_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_eenda_en.md new file mode 100644 index 00000000000000..d63574ee845f4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_eenda_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_eenda DistilBertForQuestionAnswering from EENDA +author: John Snow Labs +name: distilbert_finetuned_squadv2_eenda +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_eenda` is a English model originally trained by EENDA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_eenda_en_5.5.0_3.0_1725727455198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_eenda_en_5.5.0_3.0_1725727455198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_eenda","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_eenda", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_eenda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/EENDA/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_lusic_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_lusic_en.md new file mode 100644 index 00000000000000..ec04e7b9ebec58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_lusic_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_lusic DistilBertForQuestionAnswering from Lusic +author: John Snow Labs +name: distilbert_finetuned_squadv2_lusic +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_lusic` is a English model originally trained by Lusic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_lusic_en_5.5.0_3.0_1725695318669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_lusic_en_5.5.0_3.0_1725695318669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_lusic","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_lusic", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_lusic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Lusic/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_en.md new file mode 100644 index 00000000000000..479b624f30687d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_thangduong0509 DistilBertForQuestionAnswering from thangduong0509 +author: John Snow Labs +name: distilbert_finetuned_squadv2_thangduong0509 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_thangduong0509` is a English model originally trained by thangduong0509. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_thangduong0509_en_5.5.0_3.0_1725745862467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_thangduong0509_en_5.5.0_3.0_1725745862467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_thangduong0509","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_finetuned_squadv2_thangduong0509", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_thangduong0509| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/thangduong0509/distilbert-finetuned-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_pipeline_en.md new file mode 100644 index 00000000000000..f4caacea33dfe0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_finetuned_squadv2_thangduong0509_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English distilbert_finetuned_squadv2_thangduong0509_pipeline pipeline DistilBertForQuestionAnswering from thangduong0509 +author: John Snow Labs +name: distilbert_finetuned_squadv2_thangduong0509_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_squadv2_thangduong0509_pipeline` is a English model originally trained by thangduong0509. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_thangduong0509_pipeline_en_5.5.0_3.0_1725745874788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_squadv2_thangduong0509_pipeline_en_5.5.0_3.0_1725745874788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_finetuned_squadv2_thangduong0509_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_finetuned_squadv2_thangduong0509_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_squadv2_thangduong0509_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/thangduong0509/distilbert-finetuned-squadv2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_imdb_deborahm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_imdb_deborahm_pipeline_en.md new file mode 100644 index 00000000000000..a86b36719c54bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_imdb_deborahm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_imdb_deborahm_pipeline pipeline DistilBertForSequenceClassification from deborahm +author: John Snow Labs +name: distilbert_imdb_deborahm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_deborahm_pipeline` is a English model originally trained by deborahm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_deborahm_pipeline_en_5.5.0_3.0_1725674752542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_deborahm_pipeline_en_5.5.0_3.0_1725674752542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_imdb_deborahm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_imdb_deborahm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_deborahm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/deborahm/distilbert-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_kazakh_ner_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_kazakh_ner_2_pipeline_en.md new file mode 100644 index 00000000000000..5fa4863e4c4c90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_kazakh_ner_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_kazakh_ner_2_pipeline pipeline DistilBertForTokenClassification from yasminsur +author: John Snow Labs +name: distilbert_kazakh_ner_2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_kazakh_ner_2_pipeline` is a English model originally trained by yasminsur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_kazakh_ner_2_pipeline_en_5.5.0_3.0_1725730800550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_kazakh_ner_2_pipeline_en_5.5.0_3.0_1725730800550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_kazakh_ner_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_kazakh_ner_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_kazakh_ner_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.6 MB| + +## References + +https://huggingface.co/yasminsur/distilbert-kazakh-ner-2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_squad_dofla_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_squad_dofla_en.md new file mode 100644 index 00000000000000..639265245103fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_squad_dofla_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distilbert_squad_dofla DistilBertForQuestionAnswering from Dofla +author: John Snow Labs +name: distilbert_squad_dofla +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_squad_dofla` is a English model originally trained by Dofla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_squad_dofla_en_5.5.0_3.0_1725735871913.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_squad_dofla_en_5.5.0_3.0_1725735871913.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_squad_dofla","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distilbert_squad_dofla", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_squad_dofla| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dofla/distilbert-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_en.md new file mode 100644 index 00000000000000..243b27a59c898d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_trained DistilBertForTokenClassification from AlfredBink +author: John Snow Labs +name: distilbert_trained +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_trained` is a English model originally trained by AlfredBink. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_trained_en_5.5.0_3.0_1725739141080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_trained_en_5.5.0_3.0_1725739141080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_trained","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("distilbert_trained", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_trained| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AlfredBink/distilbert-trained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_pipeline_en.md new file mode 100644 index 00000000000000..bb23e9353df633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_trained_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_trained_pipeline pipeline DistilBertForTokenClassification from AlfredBink +author: John Snow Labs +name: distilbert_trained_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_trained_pipeline` is a English model originally trained by AlfredBink. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_trained_pipeline_en_5.5.0_3.0_1725739152917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_trained_pipeline_en_5.5.0_3.0_1725739152917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_trained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_trained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_trained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AlfredBink/distilbert-trained + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilbert_twitter_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilbert_twitter_pipeline_en.md new file mode 100644 index 00000000000000..b5558101d2f86f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilbert_twitter_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_twitter_pipeline pipeline DistilBertForSequenceClassification from minhcrafters +author: John Snow Labs +name: distilbert_twitter_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_twitter_pipeline` is a English model originally trained by minhcrafters. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_twitter_pipeline_en_5.5.0_3.0_1725674568135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_twitter_pipeline_en_5.5.0_3.0_1725674568135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_twitter_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_twitter_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_twitter_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/minhcrafters/distilbert-twitter + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distillber_squadv2_en.md b/docs/_posts/ahmedlone127/2024-09-07-distillber_squadv2_en.md new file mode 100644 index 00000000000000..376c7481f3af9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distillber_squadv2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English distillber_squadv2 DistilBertForQuestionAnswering from hskfd +author: John Snow Labs +name: distillber_squadv2 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillber_squadv2` is a English model originally trained by hskfd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillber_squadv2_en_5.5.0_3.0_1725745953725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillber_squadv2_en_5.5.0_3.0_1725745953725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("distillber_squadv2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("distillber_squadv2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillber_squadv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hskfd/distillber-squadv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_climate_d_s_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_climate_d_s_pipeline_en.md new file mode 100644 index 00000000000000..0c075a9b767439 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_climate_d_s_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_base_climate_d_s_pipeline pipeline RoBertaEmbeddings from climatebert +author: John Snow Labs +name: distilroberta_base_climate_d_s_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_climate_d_s_pipeline` is a English model originally trained by climatebert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_climate_d_s_pipeline_en_5.5.0_3.0_1725697896889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_climate_d_s_pipeline_en_5.5.0_3.0_1725697896889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_climate_d_s_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_climate_d_s_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_climate_d_s_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|307.4 MB| + +## References + +https://huggingface.co/climatebert/distilroberta-base-climate-d-s + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_en.md new file mode 100644 index 00000000000000..eb84cd622f3a71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_colombian_sign_language_python_bimodal RoBertaEmbeddings from antolin +author: John Snow Labs +name: distilroberta_base_colombian_sign_language_python_bimodal +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_colombian_sign_language_python_bimodal` is a English model originally trained by antolin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_colombian_sign_language_python_bimodal_en_5.5.0_3.0_1725678406942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_colombian_sign_language_python_bimodal_en_5.5.0_3.0_1725678406942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_colombian_sign_language_python_bimodal","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_colombian_sign_language_python_bimodal","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_colombian_sign_language_python_bimodal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/antolin/distilroberta-base-csn-python-bimodal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en.md new file mode 100644 index 00000000000000..38bbe1651e5ce6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_base_colombian_sign_language_python_bimodal_pipeline pipeline RoBertaEmbeddings from antolin +author: John Snow Labs +name: distilroberta_base_colombian_sign_language_python_bimodal_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_colombian_sign_language_python_bimodal_pipeline` is a English model originally trained by antolin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en_5.5.0_3.0_1725678421378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_colombian_sign_language_python_bimodal_pipeline_en_5.5.0_3.0_1725678421378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_colombian_sign_language_python_bimodal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_colombian_sign_language_python_bimodal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_colombian_sign_language_python_bimodal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/antolin/distilroberta-base-csn-python-bimodal + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_en.md new file mode 100644 index 00000000000000..e6b1ebdad19917 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_ft_mensrights RoBertaEmbeddings from jkruk +author: John Snow Labs +name: distilroberta_base_ft_mensrights +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_ft_mensrights` is a English model originally trained by jkruk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_mensrights_en_5.5.0_3.0_1725678032686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_mensrights_en_5.5.0_3.0_1725678032686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_mensrights","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_mensrights","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_ft_mensrights| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/jkruk/distilroberta-base-ft-mensrights \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_pipeline_en.md new file mode 100644 index 00000000000000..d9003fca3a7342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_mensrights_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_base_ft_mensrights_pipeline pipeline RoBertaEmbeddings from jkruk +author: John Snow Labs +name: distilroberta_base_ft_mensrights_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_ft_mensrights_pipeline` is a English model originally trained by jkruk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_mensrights_pipeline_en_5.5.0_3.0_1725678047456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_mensrights_pipeline_en_5.5.0_3.0_1725678047456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_base_ft_mensrights_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_base_ft_mensrights_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_ft_mensrights_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/jkruk/distilroberta-base-ft-mensrights + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_trueunpopularopinion_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_trueunpopularopinion_en.md new file mode 100644 index 00000000000000..a8693d16fa6f7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_base_ft_trueunpopularopinion_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_base_ft_trueunpopularopinion RoBertaEmbeddings from jkruk +author: John Snow Labs +name: distilroberta_base_ft_trueunpopularopinion +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_base_ft_trueunpopularopinion` is a English model originally trained by jkruk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_trueunpopularopinion_en_5.5.0_3.0_1725678002119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_base_ft_trueunpopularopinion_en_5.5.0_3.0_1725678002119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_trueunpopularopinion","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_base_ft_trueunpopularopinion","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_base_ft_trueunpopularopinion| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/jkruk/distilroberta-base-ft-trueunpopularopinion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_en.md new file mode 100644 index 00000000000000..57fe3808fbc62c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilroberta_eli5_mlm_model RoBertaEmbeddings from geshijoker +author: John Snow Labs +name: distilroberta_eli5_mlm_model +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_eli5_mlm_model` is a English model originally trained by geshijoker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_eli5_mlm_model_en_5.5.0_3.0_1725716445655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_eli5_mlm_model_en_5.5.0_3.0_1725716445655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("distilroberta_eli5_mlm_model","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("distilroberta_eli5_mlm_model","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_eli5_mlm_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/geshijoker/distilroberta_eli5_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_pipeline_en.md new file mode 100644 index 00000000000000..b76059c529fcad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-distilroberta_eli5_mlm_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilroberta_eli5_mlm_model_pipeline pipeline RoBertaEmbeddings from geshijoker +author: John Snow Labs +name: distilroberta_eli5_mlm_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilroberta_eli5_mlm_model_pipeline` is a English model originally trained by geshijoker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilroberta_eli5_mlm_model_pipeline_en_5.5.0_3.0_1725716460131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilroberta_eli5_mlm_model_pipeline_en_5.5.0_3.0_1725716460131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilroberta_eli5_mlm_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilroberta_eli5_mlm_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilroberta_eli5_mlm_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/geshijoker/distilroberta_eli5_mlm_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_alejoa_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_alejoa_en.md new file mode 100644 index 00000000000000..528e72d87a23ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_alejoa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_alejoa CamemBertEmbeddings from alejoa +author: John Snow Labs +name: dummy_model_alejoa +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_alejoa` is a English model originally trained by alejoa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_alejoa_en_5.5.0_3.0_1725691596201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_alejoa_en_5.5.0_3.0_1725691596201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_alejoa","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_alejoa","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_alejoa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/alejoa/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_anrilombard_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_anrilombard_en.md new file mode 100644 index 00000000000000..37e98460cc567d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_anrilombard_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_anrilombard CamemBertEmbeddings from anrilombard +author: John Snow Labs +name: dummy_model_anrilombard +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_anrilombard` is a English model originally trained by anrilombard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_anrilombard_en_5.5.0_3.0_1725692214978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_anrilombard_en_5.5.0_3.0_1725692214978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_anrilombard","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_anrilombard","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_anrilombard| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/anrilombard/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_en.md new file mode 100644 index 00000000000000..b5d07a39e3036d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_appletreeleaf CamemBertEmbeddings from appletreeleaf +author: John Snow Labs +name: dummy_model_appletreeleaf +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_appletreeleaf` is a English model originally trained by appletreeleaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_appletreeleaf_en_5.5.0_3.0_1725728359965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_appletreeleaf_en_5.5.0_3.0_1725728359965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_appletreeleaf","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_appletreeleaf","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_appletreeleaf| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/appletreeleaf/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_pipeline_en.md new file mode 100644 index 00000000000000..d1c38d37fb50dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_appletreeleaf_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_appletreeleaf_pipeline pipeline CamemBertEmbeddings from appletreeleaf +author: John Snow Labs +name: dummy_model_appletreeleaf_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_appletreeleaf_pipeline` is a English model originally trained by appletreeleaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_appletreeleaf_pipeline_en_5.5.0_3.0_1725728434972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_appletreeleaf_pipeline_en_5.5.0_3.0_1725728434972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_appletreeleaf_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_appletreeleaf_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_appletreeleaf_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/appletreeleaf/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_bhaskar_gautam_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_bhaskar_gautam_pipeline_en.md new file mode 100644 index 00000000000000..4119946b0dcd5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_bhaskar_gautam_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_bhaskar_gautam_pipeline pipeline CamemBertEmbeddings from bhaskar-gautam +author: John Snow Labs +name: dummy_model_bhaskar_gautam_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_bhaskar_gautam_pipeline` is a English model originally trained by bhaskar-gautam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_bhaskar_gautam_pipeline_en_5.5.0_3.0_1725728557554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_bhaskar_gautam_pipeline_en_5.5.0_3.0_1725728557554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_bhaskar_gautam_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_bhaskar_gautam_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_bhaskar_gautam_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/bhaskar-gautam/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_en.md new file mode 100644 index 00000000000000..1d8142d47fc74f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_elliotsmith CamemBertEmbeddings from elliotsmith +author: John Snow Labs +name: dummy_model_elliotsmith +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_elliotsmith` is a English model originally trained by elliotsmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_elliotsmith_en_5.5.0_3.0_1725691285508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_elliotsmith_en_5.5.0_3.0_1725691285508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_elliotsmith","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_elliotsmith","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_elliotsmith| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/elliotsmith/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_pipeline_en.md new file mode 100644 index 00000000000000..130e2c4b5b29ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_elliotsmith_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_elliotsmith_pipeline pipeline CamemBertEmbeddings from elliotsmith +author: John Snow Labs +name: dummy_model_elliotsmith_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_elliotsmith_pipeline` is a English model originally trained by elliotsmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_elliotsmith_pipeline_en_5.5.0_3.0_1725691359788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_elliotsmith_pipeline_en_5.5.0_3.0_1725691359788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_elliotsmith_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_elliotsmith_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_elliotsmith_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/elliotsmith/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_hanzhuo_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_hanzhuo_en.md new file mode 100644 index 00000000000000..58ea4d9240fc82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_hanzhuo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_hanzhuo CamemBertEmbeddings from hanzhuo +author: John Snow Labs +name: dummy_model_hanzhuo +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_hanzhuo` is a English model originally trained by hanzhuo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_hanzhuo_en_5.5.0_3.0_1725728966811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_hanzhuo_en_5.5.0_3.0_1725728966811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_hanzhuo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_hanzhuo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_hanzhuo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/hanzhuo/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_jfforero_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_jfforero_en.md new file mode 100644 index 00000000000000..8a8ec14d772370 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_jfforero_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_jfforero CamemBertEmbeddings from jfforero +author: John Snow Labs +name: dummy_model_jfforero +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jfforero` is a English model originally trained by jfforero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jfforero_en_5.5.0_3.0_1725691360265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jfforero_en_5.5.0_3.0_1725691360265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_jfforero","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_jfforero","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jfforero| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/jfforero/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_linyi_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_linyi_en.md new file mode 100644 index 00000000000000..4fc50219881fb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_linyi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_linyi CamemBertEmbeddings from linyi +author: John Snow Labs +name: dummy_model_linyi +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_linyi` is a English model originally trained by linyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_linyi_en_5.5.0_3.0_1725728205159.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_linyi_en_5.5.0_3.0_1725728205159.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_linyi","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_linyi","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_linyi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/linyi/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_en.md new file mode 100644 index 00000000000000..e1f69d896d938c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_marasaki CamemBertEmbeddings from marasaki +author: John Snow Labs +name: dummy_model_marasaki +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_marasaki` is a English model originally trained by marasaki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_marasaki_en_5.5.0_3.0_1725728136541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_marasaki_en_5.5.0_3.0_1725728136541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_marasaki","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_marasaki","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_marasaki| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/marasaki/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_pipeline_en.md new file mode 100644 index 00000000000000..f8910307a0ef6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_marasaki_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_marasaki_pipeline pipeline CamemBertEmbeddings from marasaki +author: John Snow Labs +name: dummy_model_marasaki_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_marasaki_pipeline` is a English model originally trained by marasaki. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_marasaki_pipeline_en_5.5.0_3.0_1725728210309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_marasaki_pipeline_en_5.5.0_3.0_1725728210309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_marasaki_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_marasaki_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_marasaki_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/marasaki/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_en.md new file mode 100644 index 00000000000000..f2ed3c43f3a47b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_melody20 CamemBertEmbeddings from melody20 +author: John Snow Labs +name: dummy_model_melody20 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_melody20` is a English model originally trained by melody20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_melody20_en_5.5.0_3.0_1725692288450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_melody20_en_5.5.0_3.0_1725692288450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_melody20","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_melody20","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_melody20| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/melody20/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_pipeline_en.md new file mode 100644 index 00000000000000..a4f0a9aec23e9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_melody20_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_melody20_pipeline pipeline CamemBertEmbeddings from melody20 +author: John Snow Labs +name: dummy_model_melody20_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_melody20_pipeline` is a English model originally trained by melody20. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_melody20_pipeline_en_5.5.0_3.0_1725692361715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_melody20_pipeline_en_5.5.0_3.0_1725692361715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_melody20_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_melody20_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_melody20_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/melody20/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_pipeline_zh.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_pipeline_zh.md new file mode 100644 index 00000000000000..c62771a8c532aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_pipeline_zh.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Chinese dummy_model_pipeline pipeline CamemBertEmbeddings from gtxygyzb +author: John Snow Labs +name: dummy_model_pipeline +date: 2024-09-07 +tags: [zh, open_source, pipeline, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_pipeline` is a Chinese model originally trained by gtxygyzb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_pipeline_zh_5.5.0_3.0_1725728210906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_pipeline_zh_5.5.0_3.0_1725728210906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_pipeline", lang = "zh") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_pipeline", lang = "zh") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|zh| +|Size:|264.0 MB| + +## References + +https://huggingface.co/gtxygyzb/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_rkn222_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_rkn222_pipeline_en.md new file mode 100644 index 00000000000000..4ff2f682dc6469 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_rkn222_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_rkn222_pipeline pipeline CamemBertEmbeddings from RKN222 +author: John Snow Labs +name: dummy_model_rkn222_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_rkn222_pipeline` is a English model originally trained by RKN222. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_rkn222_pipeline_en_5.5.0_3.0_1725728325987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_rkn222_pipeline_en_5.5.0_3.0_1725728325987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_rkn222_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_rkn222_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_rkn222_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/RKN222/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_en.md new file mode 100644 index 00000000000000..894c0ff5ddf8c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_sajid73 CamemBertEmbeddings from sajid73 +author: John Snow Labs +name: dummy_model_sajid73 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_sajid73` is a English model originally trained by sajid73. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_sajid73_en_5.5.0_3.0_1725691645271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_sajid73_en_5.5.0_3.0_1725691645271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_sajid73","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_sajid73","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_sajid73| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sajid73/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_pipeline_en.md new file mode 100644 index 00000000000000..e35d407f1b2cde --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_sajid73_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_sajid73_pipeline pipeline CamemBertEmbeddings from sajid73 +author: John Snow Labs +name: dummy_model_sajid73_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_sajid73_pipeline` is a English model originally trained by sajid73. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_sajid73_pipeline_en_5.5.0_3.0_1725691720008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_sajid73_pipeline_en_5.5.0_3.0_1725691720008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_sajid73_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_sajid73_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_sajid73_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/sajid73/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_fr.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_fr.md new file mode 100644 index 00000000000000..5a076d97a4ca6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French dummy_model_shadowtwin41 CamemBertEmbeddings from ShadowTwin41 +author: John Snow Labs +name: dummy_model_shadowtwin41 +date: 2024-09-07 +tags: [fr, open_source, onnx, embeddings, camembert] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_shadowtwin41` is a French model originally trained by ShadowTwin41. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_shadowtwin41_fr_5.5.0_3.0_1725691849181.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_shadowtwin41_fr_5.5.0_3.0_1725691849181.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_shadowtwin41","fr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_shadowtwin41","fr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_shadowtwin41| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|fr| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ShadowTwin41/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_pipeline_fr.md new file mode 100644 index 00000000000000..2b72f00fd49a03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_shadowtwin41_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French dummy_model_shadowtwin41_pipeline pipeline CamemBertEmbeddings from ShadowTwin41 +author: John Snow Labs +name: dummy_model_shadowtwin41_pipeline +date: 2024-09-07 +tags: [fr, open_source, pipeline, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_shadowtwin41_pipeline` is a French model originally trained by ShadowTwin41. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_shadowtwin41_pipeline_fr_5.5.0_3.0_1725691924504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_shadowtwin41_pipeline_fr_5.5.0_3.0_1725691924504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_shadowtwin41_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_shadowtwin41_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_shadowtwin41_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|264.0 MB| + +## References + +https://huggingface.co/ShadowTwin41/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_test_osanseviero_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_test_osanseviero_pipeline_en.md new file mode 100644 index 00000000000000..8c544f630c0291 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_test_osanseviero_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_test_osanseviero_pipeline pipeline CamemBertEmbeddings from osanseviero +author: John Snow Labs +name: dummy_model_test_osanseviero_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_test_osanseviero_pipeline` is a English model originally trained by osanseviero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_test_osanseviero_pipeline_en_5.5.0_3.0_1725729218053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_test_osanseviero_pipeline_en_5.5.0_3.0_1725729218053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_test_osanseviero_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_test_osanseviero_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_test_osanseviero_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/osanseviero/dummy-model-test + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_en.md new file mode 100644 index 00000000000000..610f4b40c652d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_varunpatrikar CamemBertEmbeddings from varunpatrikar +author: John Snow Labs +name: dummy_model_varunpatrikar +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_varunpatrikar` is a English model originally trained by varunpatrikar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_varunpatrikar_en_5.5.0_3.0_1725691513134.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_varunpatrikar_en_5.5.0_3.0_1725691513134.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_varunpatrikar","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_varunpatrikar","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_varunpatrikar| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/varunpatrikar/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_pipeline_en.md new file mode 100644 index 00000000000000..946911e353e773 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_varunpatrikar_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_varunpatrikar_pipeline pipeline CamemBertEmbeddings from varunpatrikar +author: John Snow Labs +name: dummy_model_varunpatrikar_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_varunpatrikar_pipeline` is a English model originally trained by varunpatrikar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_varunpatrikar_pipeline_en_5.5.0_3.0_1725691588682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_varunpatrikar_pipeline_en_5.5.0_3.0_1725691588682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_varunpatrikar_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_varunpatrikar_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_varunpatrikar_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/varunpatrikar/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_en.md new file mode 100644 index 00000000000000..9eed253263f114 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_vonewman CamemBertEmbeddings from vonewman +author: John Snow Labs +name: dummy_model_vonewman +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_vonewman` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_vonewman_en_5.5.0_3.0_1725728476125.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_vonewman_en_5.5.0_3.0_1725728476125.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_vonewman","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_vonewman","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_vonewman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/vonewman/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_pipeline_en.md new file mode 100644 index 00000000000000..74e69439c06a72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_vonewman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_vonewman_pipeline pipeline CamemBertEmbeddings from vonewman +author: John Snow Labs +name: dummy_model_vonewman_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_vonewman_pipeline` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_vonewman_pipeline_en_5.5.0_3.0_1725728550421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_vonewman_pipeline_en_5.5.0_3.0_1725728550421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_vonewman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_vonewman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_vonewman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/vonewman/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_en.md new file mode 100644 index 00000000000000..bf6f3285c700ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English dummy_model_yuuhanishigata CamemBertEmbeddings from YuuhaNishigata +author: John Snow Labs +name: dummy_model_yuuhanishigata +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_yuuhanishigata` is a English model originally trained by YuuhaNishigata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_yuuhanishigata_en_5.5.0_3.0_1725691357983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_yuuhanishigata_en_5.5.0_3.0_1725691357983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model_yuuhanishigata","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model_yuuhanishigata","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_yuuhanishigata| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/YuuhaNishigata/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_pipeline_en.md new file mode 100644 index 00000000000000..572307b17caa1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_yuuhanishigata_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English dummy_model_yuuhanishigata_pipeline pipeline CamemBertEmbeddings from YuuhaNishigata +author: John Snow Labs +name: dummy_model_yuuhanishigata_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_yuuhanishigata_pipeline` is a English model originally trained by YuuhaNishigata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_yuuhanishigata_pipeline_en_5.5.0_3.0_1725691432194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_yuuhanishigata_pipeline_en_5.5.0_3.0_1725691432194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("dummy_model_yuuhanishigata_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("dummy_model_yuuhanishigata_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_yuuhanishigata_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|264.0 MB| + +## References + +https://huggingface.co/YuuhaNishigata/dummy-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dummy_model_zh.md b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_zh.md new file mode 100644 index 00000000000000..93efac9606e5ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dummy_model_zh.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Chinese dummy_model CamemBertEmbeddings from gtxygyzb +author: John Snow Labs +name: dummy_model +date: 2024-09-07 +tags: [zh, open_source, onnx, embeddings, camembert] +task: Embeddings +language: zh +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model` is a Chinese model originally trained by gtxygyzb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_zh_5.5.0_3.0_1725728137549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_zh_5.5.0_3.0_1725728137549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("dummy_model","zh") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("dummy_model","zh") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|zh| +|Size:|264.0 MB| + +## References + +https://huggingface.co/gtxygyzb/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-dzoqa_malayalam_en.md b/docs/_posts/ahmedlone127/2024-09-07-dzoqa_malayalam_en.md new file mode 100644 index 00000000000000..be76d6db7f28d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-dzoqa_malayalam_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English dzoqa_malayalam DistilBertForQuestionAnswering from Norphel +author: John Snow Labs +name: dzoqa_malayalam +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dzoqa_malayalam` is a English model originally trained by Norphel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dzoqa_malayalam_en_5.5.0_3.0_1725695690531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dzoqa_malayalam_en_5.5.0_3.0_1725695690531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("dzoqa_malayalam","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("dzoqa_malayalam", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dzoqa_malayalam| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Norphel/dzoQA_ml \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_en.md b/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_en.md new file mode 100644 index 00000000000000..05206c613a986b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English ElectraForQuestionAnswering model (from PremalMatalia) +author: John Snow Labs +name: electra_qa_base_best_squad2 +date: 2024-09-07 +tags: [en, open_source, electra, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `electra-base-best-squad2` is a English model originally trained by `PremalMatalia`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_qa_base_best_squad2_en_5.5.0_3.0_1725709292496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_qa_base_best_squad2_en_5.5.0_3.0_1725709292496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("electra_qa_base_best_squad2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = BertForQuestionAnswering.pretrained("electra_qa_base_best_squad2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.electra.base").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_qa_base_best_squad2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|408.0 MB| + +## References + +References + +- https://huggingface.co/PremalMatalia/electra-base-best-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_pipeline_en.md new file mode 100644 index 00000000000000..3f7da2725b59fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-electra_qa_base_best_squad2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English electra_qa_base_best_squad2_pipeline pipeline BertForQuestionAnswering from PremalMatalia +author: John Snow Labs +name: electra_qa_base_best_squad2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`electra_qa_base_best_squad2_pipeline` is a English model originally trained by PremalMatalia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/electra_qa_base_best_squad2_pipeline_en_5.5.0_3.0_1725709310468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/electra_qa_base_best_squad2_pipeline_en_5.5.0_3.0_1725709310468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("electra_qa_base_best_squad2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("electra_qa_base_best_squad2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|electra_qa_base_best_squad2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/PremalMatalia/electra-base-best-squad2 + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-elvis_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-elvis_roberta_pipeline_en.md new file mode 100644 index 00000000000000..c3f3f80cb4ecb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-elvis_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English elvis_roberta_pipeline pipeline RoBertaForSequenceClassification from elvis-d +author: John Snow Labs +name: elvis_roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`elvis_roberta_pipeline` is a English model originally trained by elvis-d. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/elvis_roberta_pipeline_en_5.5.0_3.0_1725717594217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/elvis_roberta_pipeline_en_5.5.0_3.0_1725717594217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("elvis_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("elvis_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|elvis_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|444.0 MB| + +## References + +https://huggingface.co/elvis-d/elvis_roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_en.md b/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_en.md new file mode 100644 index 00000000000000..e2b40fe02d322f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English email_question_extraction RoBertaForTokenClassification from arya555 +author: John Snow Labs +name: email_question_extraction +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`email_question_extraction` is a English model originally trained by arya555. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/email_question_extraction_en_5.5.0_3.0_1725708527456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/email_question_extraction_en_5.5.0_3.0_1725708527456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("email_question_extraction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("email_question_extraction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|email_question_extraction| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|422.7 MB| + +## References + +https://huggingface.co/arya555/email_question_extraction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_pipeline_en.md new file mode 100644 index 00000000000000..e849672d488317 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-email_question_extraction_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English email_question_extraction_pipeline pipeline RoBertaForTokenClassification from arya555 +author: John Snow Labs +name: email_question_extraction_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`email_question_extraction_pipeline` is a English model originally trained by arya555. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/email_question_extraction_pipeline_en_5.5.0_3.0_1725708565641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/email_question_extraction_pipeline_en_5.5.0_3.0_1725708565641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("email_question_extraction_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("email_question_extraction_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|email_question_extraction_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|422.7 MB| + +## References + +https://huggingface.co/arya555/email_question_extraction + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-emotion_test_1000_en.md b/docs/_posts/ahmedlone127/2024-09-07-emotion_test_1000_en.md new file mode 100644 index 00000000000000..ac9b32d9aa4d4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-emotion_test_1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English emotion_test_1000 DistilBertForSequenceClassification from espirado1 +author: John Snow Labs +name: emotion_test_1000 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`emotion_test_1000` is a English model originally trained by espirado1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/emotion_test_1000_en_5.5.0_3.0_1725675028395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/emotion_test_1000_en_5.5.0_3.0_1725675028395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("emotion_test_1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("emotion_test_1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|emotion_test_1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/espirado1/emotion-test-1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_en.md new file mode 100644 index 00000000000000..89a5b04b1ea9e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English english_multinerd_ner_roberta RoBertaForTokenClassification from pariakashani +author: John Snow Labs +name: english_multinerd_ner_roberta +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_multinerd_ner_roberta` is a English model originally trained by pariakashani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_roberta_en_5.5.0_3.0_1725708143245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_roberta_en_5.5.0_3.0_1725708143245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("english_multinerd_ner_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("english_multinerd_ner_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_multinerd_ner_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|459.8 MB| + +## References + +https://huggingface.co/pariakashani/en-multinerd-ner-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_pipeline_en.md new file mode 100644 index 00000000000000..8b211a34b6e68a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-english_multinerd_ner_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English english_multinerd_ner_roberta_pipeline pipeline RoBertaForTokenClassification from pariakashani +author: John Snow Labs +name: english_multinerd_ner_roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`english_multinerd_ner_roberta_pipeline` is a English model originally trained by pariakashani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_roberta_pipeline_en_5.5.0_3.0_1725708165379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/english_multinerd_ner_roberta_pipeline_en_5.5.0_3.0_1725708165379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("english_multinerd_ner_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("english_multinerd_ner_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|english_multinerd_ner_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|459.8 MB| + +## References + +https://huggingface.co/pariakashani/en-multinerd-ner-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-envroberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-envroberta_base_en.md new file mode 100644 index 00000000000000..4ab553239bc805 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-envroberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English envroberta_base RoBertaEmbeddings from ESGBERT +author: John Snow Labs +name: envroberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`envroberta_base` is a English model originally trained by ESGBERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/envroberta_base_en_5.5.0_3.0_1725673433717.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/envroberta_base_en_5.5.0_3.0_1725673433717.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("envroberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("envroberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|envroberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/ESGBERT/EnvRoBERTa-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-erikrepo_en.md b/docs/_posts/ahmedlone127/2024-09-07-erikrepo_en.md new file mode 100644 index 00000000000000..0bf701b2ff1ebd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-erikrepo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English erikrepo DistilBertEmbeddings from erikmsz +author: John Snow Labs +name: erikrepo +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`erikrepo` is a English model originally trained by erikmsz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/erikrepo_en_5.5.0_3.0_1725742593977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/erikrepo_en_5.5.0_3.0_1725742593977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("erikrepo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("erikrepo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|erikrepo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/erikmsz/erikrepo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-esmlmt62_2500_en.md b/docs/_posts/ahmedlone127/2024-09-07-esmlmt62_2500_en.md new file mode 100644 index 00000000000000..5828c1f9d6cd7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-esmlmt62_2500_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English esmlmt62_2500 BertEmbeddings from hjkim811 +author: John Snow Labs +name: esmlmt62_2500 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esmlmt62_2500` is a English model originally trained by hjkim811. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esmlmt62_2500_en_5.5.0_3.0_1725675888389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esmlmt62_2500_en_5.5.0_3.0_1725675888389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("esmlmt62_2500","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("esmlmt62_2500","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esmlmt62_2500| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hjkim811/esmlmt62-2500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-esperberto_small_sayula_popoluca_eo.md b/docs/_posts/ahmedlone127/2024-09-07-esperberto_small_sayula_popoluca_eo.md new file mode 100644 index 00000000000000..a85154b709d997 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-esperberto_small_sayula_popoluca_eo.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Esperanto esperberto_small_sayula_popoluca RoBertaForTokenClassification from Xenova +author: John Snow Labs +name: esperberto_small_sayula_popoluca +date: 2024-09-07 +tags: [eo, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: eo +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esperberto_small_sayula_popoluca` is a Esperanto model originally trained by Xenova. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esperberto_small_sayula_popoluca_eo_5.5.0_3.0_1725668119115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esperberto_small_sayula_popoluca_eo_5.5.0_3.0_1725668119115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("esperberto_small_sayula_popoluca","eo") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("esperberto_small_sayula_popoluca", "eo") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esperberto_small_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|eo| +|Size:|311.4 MB| + +## References + +https://huggingface.co/Xenova/EsperBERTo-small-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_en.md new file mode 100644 index 00000000000000..521a2fe4d48f2c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English eth_setfit_model MPNetEmbeddings from kanixwang +author: John Snow Labs +name: eth_setfit_model +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eth_setfit_model` is a English model originally trained by kanixwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eth_setfit_model_en_5.5.0_3.0_1725703310208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eth_setfit_model_en_5.5.0_3.0_1725703310208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("eth_setfit_model","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("eth_setfit_model","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eth_setfit_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/kanixwang/eth-setfit-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_pipeline_en.md new file mode 100644 index 00000000000000..837aea0e96f76a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-eth_setfit_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English eth_setfit_model_pipeline pipeline MPNetEmbeddings from kanixwang +author: John Snow Labs +name: eth_setfit_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eth_setfit_model_pipeline` is a English model originally trained by kanixwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eth_setfit_model_pipeline_en_5.5.0_3.0_1725703329519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eth_setfit_model_pipeline_en_5.5.0_3.0_1725703329519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("eth_setfit_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("eth_setfit_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eth_setfit_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/kanixwang/eth-setfit-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ewondo_xlm_roberta_base_pipeline_nan.md b/docs/_posts/ahmedlone127/2024-09-07-ewondo_xlm_roberta_base_pipeline_nan.md new file mode 100644 index 00000000000000..52967bde78f208 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ewondo_xlm_roberta_base_pipeline_nan.md @@ -0,0 +1,70 @@ +--- +layout: model +title: None ewondo_xlm_roberta_base_pipeline pipeline XlmRoBertaEmbeddings from ELRs +author: John Snow Labs +name: ewondo_xlm_roberta_base_pipeline +date: 2024-09-07 +tags: [nan, open_source, pipeline, onnx] +task: Embeddings +language: nan +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ewondo_xlm_roberta_base_pipeline` is a None model originally trained by ELRs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ewondo_xlm_roberta_base_pipeline_nan_5.5.0_3.0_1725677515205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ewondo_xlm_roberta_base_pipeline_nan_5.5.0_3.0_1725677515205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ewondo_xlm_roberta_base_pipeline", lang = "nan") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ewondo_xlm_roberta_base_pipeline", lang = "nan") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ewondo_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|nan| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ELRs/Ewondo_xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-expe_0_en.md b/docs/_posts/ahmedlone127/2024-09-07-expe_0_en.md new file mode 100644 index 00000000000000..6b79f331ee8aeb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-expe_0_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English expe_0 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: expe_0 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`expe_0` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/expe_0_en_5.5.0_3.0_1725679585006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/expe_0_en_5.5.0_3.0_1725679585006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("expe_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|expe_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Expe_0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-expe_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-expe_0_pipeline_en.md new file mode 100644 index 00000000000000..61ab9b73449dab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-expe_0_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English expe_0_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: expe_0_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`expe_0_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/expe_0_pipeline_en_5.5.0_3.0_1725679607153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/expe_0_pipeline_en_5.5.0_3.0_1725679607153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("expe_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("expe_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|expe_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Expe_0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-extractive_qa_squad_en.md b/docs/_posts/ahmedlone127/2024-09-07-extractive_qa_squad_en.md new file mode 100644 index 00000000000000..6092478ef5c184 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-extractive_qa_squad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English extractive_qa_squad DistilBertForQuestionAnswering from Palistha +author: John Snow Labs +name: extractive_qa_squad +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`extractive_qa_squad` is a English model originally trained by Palistha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/extractive_qa_squad_en_5.5.0_3.0_1725722626336.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/extractive_qa_squad_en_5.5.0_3.0_1725722626336.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("extractive_qa_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("extractive_qa_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|extractive_qa_squad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Palistha/extractive_qa_squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_en.md b/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_en.md new file mode 100644 index 00000000000000..b58b5cfeb3f685 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English fairlex_ecthr_minilm RoBertaEmbeddings from coastalcph +author: John Snow Labs +name: fairlex_ecthr_minilm +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fairlex_ecthr_minilm` is a English model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fairlex_ecthr_minilm_en_5.5.0_3.0_1725677723647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fairlex_ecthr_minilm_en_5.5.0_3.0_1725677723647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("fairlex_ecthr_minilm","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("fairlex_ecthr_minilm","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fairlex_ecthr_minilm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|114.0 MB| + +## References + +https://huggingface.co/coastalcph/fairlex-ecthr-minilm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_pipeline_en.md new file mode 100644 index 00000000000000..fd12a00b4ec299 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fairlex_ecthr_minilm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fairlex_ecthr_minilm_pipeline pipeline RoBertaEmbeddings from coastalcph +author: John Snow Labs +name: fairlex_ecthr_minilm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fairlex_ecthr_minilm_pipeline` is a English model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fairlex_ecthr_minilm_pipeline_en_5.5.0_3.0_1725677729800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fairlex_ecthr_minilm_pipeline_en_5.5.0_3.0_1725677729800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fairlex_ecthr_minilm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fairlex_ecthr_minilm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fairlex_ecthr_minilm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|114.0 MB| + +## References + +https://huggingface.co/coastalcph/fairlex-ecthr-minilm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fewshot_qa_002_20230622_001_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-fewshot_qa_002_20230622_001_pipeline_en.md new file mode 100644 index 00000000000000..b3665b2353c00f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fewshot_qa_002_20230622_001_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fewshot_qa_002_20230622_001_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: fewshot_qa_002_20230622_001_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fewshot_qa_002_20230622_001_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fewshot_qa_002_20230622_001_pipeline_en_5.5.0_3.0_1725686154408.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fewshot_qa_002_20230622_001_pipeline_en_5.5.0_3.0_1725686154408.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fewshot_qa_002_20230622_001_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fewshot_qa_002_20230622_001_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fewshot_qa_002_20230622_001_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|880.1 MB| + +## References + +https://huggingface.co/intanm/fewshot-qa-002-20230622-001 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_en.md new file mode 100644 index 00000000000000..69098e71b18eb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English film95960roberta_base RoBertaEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film95960roberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film95960roberta_base` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film95960roberta_base_en_5.5.0_3.0_1725677793723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film95960roberta_base_en_5.5.0_3.0_1725677793723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("film95960roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("film95960roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film95960roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film95960roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..c2f5f19ee49873 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-film95960roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English film95960roberta_base_pipeline pipeline RoBertaEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film95960roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film95960roberta_base_pipeline` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film95960roberta_base_pipeline_en_5.5.0_3.0_1725677815840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film95960roberta_base_pipeline_en_5.5.0_3.0_1725677815840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("film95960roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("film95960roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film95960roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film95960roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fine_tune_en.md b/docs/_posts/ahmedlone127/2024-09-07-fine_tune_en.md new file mode 100644 index 00000000000000..e5d5371703cd2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fine_tune_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tune MPNetEmbeddings from diegoicomp +author: John Snow Labs +name: fine_tune +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tune` is a English model originally trained by diegoicomp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tune_en_5.5.0_3.0_1725703441563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tune_en_5.5.0_3.0_1725703441563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("fine_tune","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("fine_tune","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/diegoicomp/fine-tune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fine_tune_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-fine_tune_pipeline_en.md new file mode 100644 index 00000000000000..b7e688ae9dbb48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fine_tune_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English fine_tune_pipeline pipeline MPNetEmbeddings from diegoicomp +author: John Snow Labs +name: fine_tune_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tune_pipeline` is a English model originally trained by diegoicomp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tune_pipeline_en_5.5.0_3.0_1725703460646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tune_pipeline_en_5.5.0_3.0_1725703460646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fine_tune_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fine_tune_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/diegoicomp/fine-tune + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_distilbert_en.md new file mode 100644 index 00000000000000..b0e4a79edd04ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_distilbert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_distilbert DistilBertForQuestionAnswering from Roamify +author: John Snow Labs +name: fine_tuned_distilbert +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_distilbert` is a English model originally trained by Roamify. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_en_5.5.0_3.0_1725746193409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_en_5.5.0_3.0_1725746193409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_distilbert","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_distilbert", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Roamify/fine-tuned-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_tradisi_bali_en.md b/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_tradisi_bali_en.md new file mode 100644 index 00000000000000..2104b96e1020e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fine_tuned_tradisi_bali_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English fine_tuned_tradisi_bali DistilBertForQuestionAnswering from SwastyMaharani +author: John Snow Labs +name: fine_tuned_tradisi_bali +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_tradisi_bali` is a English model originally trained by SwastyMaharani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_tradisi_bali_en_5.5.0_3.0_1725727151969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_tradisi_bali_en_5.5.0_3.0_1725727151969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_tradisi_bali","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("fine_tuned_tradisi_bali", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_tradisi_bali| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SwastyMaharani/fine-tuned-tradisi-bali \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_en.md new file mode 100644 index 00000000000000..eef6d0f3c882da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_aihub_english_tonga_tonga_islands_korean MarianTransformer from YoungBinLee +author: John Snow Labs +name: finetuned_aihub_english_tonga_tonga_islands_korean +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_aihub_english_tonga_tonga_islands_korean` is a English model originally trained by YoungBinLee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_aihub_english_tonga_tonga_islands_korean_en_5.5.0_3.0_1725746599826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_aihub_english_tonga_tonga_islands_korean_en_5.5.0_3.0_1725746599826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("finetuned_aihub_english_tonga_tonga_islands_korean","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("finetuned_aihub_english_tonga_tonga_islands_korean","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_aihub_english_tonga_tonga_islands_korean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/YoungBinLee/finetuned-aihub-en-to-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en.md new file mode 100644 index 00000000000000..cb3d0d3cb471d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_aihub_english_tonga_tonga_islands_korean_pipeline pipeline MarianTransformer from YoungBinLee +author: John Snow Labs +name: finetuned_aihub_english_tonga_tonga_islands_korean_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_aihub_english_tonga_tonga_islands_korean_pipeline` is a English model originally trained by YoungBinLee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en_5.5.0_3.0_1725746647875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_aihub_english_tonga_tonga_islands_korean_pipeline_en_5.5.0_3.0_1725746647875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_aihub_english_tonga_tonga_islands_korean_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_aihub_english_tonga_tonga_islands_korean_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_aihub_english_tonga_tonga_islands_korean_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/YoungBinLee/finetuned-aihub-en-to-ko + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en.md new file mode 100644 index 00000000000000..6eae2e2e6ce9f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finetuned_one_epoch_multi_qa_mpnet_base_dot_v1 MPNetEmbeddings from mustozsarac +author: John Snow Labs +name: finetuned_one_epoch_multi_qa_mpnet_base_dot_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_one_epoch_multi_qa_mpnet_base_dot_v1` is a English model originally trained by mustozsarac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en_5.5.0_3.0_1725703696556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_en_5.5.0_3.0_1725703696556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("finetuned_one_epoch_multi_qa_mpnet_base_dot_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("finetuned_one_epoch_multi_qa_mpnet_base_dot_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_one_epoch_multi_qa_mpnet_base_dot_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mustozsarac/finetuned-one-epoch-multi-qa-mpnet-base-dot-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en.md new file mode 100644 index 00000000000000..3ee7d305e605ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline pipeline MPNetEmbeddings from mustozsarac +author: John Snow Labs +name: finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline` is a English model originally trained by mustozsarac. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en_5.5.0_3.0_1725703720319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline_en_5.5.0_3.0_1725703720319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_one_epoch_multi_qa_mpnet_base_dot_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mustozsarac/finetuned-one-epoch-multi-qa-mpnet-base-dot-v1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_en.md new file mode 100644 index 00000000000000..b19f3462aa2ec7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finetuned_sentiment_classfication_roberta_model_slickdata RoBertaForSequenceClassification from slickdata +author: John Snow Labs +name: finetuned_sentiment_classfication_roberta_model_slickdata +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sentiment_classfication_roberta_model_slickdata` is a English model originally trained by slickdata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sentiment_classfication_roberta_model_slickdata_en_5.5.0_3.0_1725680323419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sentiment_classfication_roberta_model_slickdata_en_5.5.0_3.0_1725680323419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("finetuned_sentiment_classfication_roberta_model_slickdata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("finetuned_sentiment_classfication_roberta_model_slickdata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sentiment_classfication_roberta_model_slickdata| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|443.7 MB| + +## References + +https://huggingface.co/slickdata/finetuned-Sentiment-classfication-ROBERTA-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en.md new file mode 100644 index 00000000000000..7a0d118373e842 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English finetuned_sentiment_classfication_roberta_model_slickdata_pipeline pipeline RoBertaForSequenceClassification from slickdata +author: John Snow Labs +name: finetuned_sentiment_classfication_roberta_model_slickdata_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_sentiment_classfication_roberta_model_slickdata_pipeline` is a English model originally trained by slickdata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en_5.5.0_3.0_1725680346107.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_sentiment_classfication_roberta_model_slickdata_pipeline_en_5.5.0_3.0_1725680346107.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("finetuned_sentiment_classfication_roberta_model_slickdata_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("finetuned_sentiment_classfication_roberta_model_slickdata_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_sentiment_classfication_roberta_model_slickdata_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|443.7 MB| + +## References + +https://huggingface.co/slickdata/finetuned-Sentiment-classfication-ROBERTA-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finetuning_mixed_en.md b/docs/_posts/ahmedlone127/2024-09-07-finetuning_mixed_en.md new file mode 100644 index 00000000000000..348428b381a198 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finetuning_mixed_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finetuning_mixed MPNetEmbeddings from jhsmith +author: John Snow Labs +name: finetuning_mixed +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuning_mixed` is a English model originally trained by jhsmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuning_mixed_en_5.5.0_3.0_1725703816678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuning_mixed_en_5.5.0_3.0_1725703816678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("finetuning_mixed","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("finetuning_mixed","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuning_mixed| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/jhsmith/finetuning_mixed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-finsentencebert_en.md b/docs/_posts/ahmedlone127/2024-09-07-finsentencebert_en.md new file mode 100644 index 00000000000000..cfa0bed7cdc882 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-finsentencebert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English finsentencebert MPNetEmbeddings from syang687 +author: John Snow Labs +name: finsentencebert +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finsentencebert` is a English model originally trained by syang687. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finsentencebert_en_5.5.0_3.0_1725703010006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finsentencebert_en_5.5.0_3.0_1725703010006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("finsentencebert","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("finsentencebert","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finsentencebert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/syang687/FinSentenceBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-fresh_model_uncased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-fresh_model_uncased_pipeline_en.md new file mode 100644 index 00000000000000..435f35d812521c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-fresh_model_uncased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English fresh_model_uncased_pipeline pipeline DistilBertForTokenClassification from Gkumi +author: John Snow Labs +name: fresh_model_uncased_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fresh_model_uncased_pipeline` is a English model originally trained by Gkumi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fresh_model_uncased_pipeline_en_5.5.0_3.0_1725739235022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fresh_model_uncased_pipeline_en_5.5.0_3.0_1725739235022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("fresh_model_uncased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("fresh_model_uncased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fresh_model_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Gkumi/fresh-model-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-from_classifier_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-from_classifier_v1_pipeline_en.md new file mode 100644 index 00000000000000..9f177a5e139ea7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-from_classifier_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English from_classifier_v1_pipeline pipeline MPNetEmbeddings from futuredatascience +author: John Snow Labs +name: from_classifier_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`from_classifier_v1_pipeline` is a English model originally trained by futuredatascience. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/from_classifier_v1_pipeline_en_5.5.0_3.0_1725703020620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/from_classifier_v1_pipeline_en_5.5.0_3.0_1725703020620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("from_classifier_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("from_classifier_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|from_classifier_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/futuredatascience/from-classifier-v1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_en.md new file mode 100644 index 00000000000000..bc4709371ed338 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gal_sayula_popoluca_iw_3 XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: gal_sayula_popoluca_iw_3 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_sayula_popoluca_iw_3` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iw_3_en_5.5.0_3.0_1725704126144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iw_3_en_5.5.0_3.0_1725704126144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_sayula_popoluca_iw_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_sayula_popoluca_iw_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_sayula_popoluca_iw_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|417.1 MB| + +## References + +https://huggingface.co/homersimpson/gal-pos-iw-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_pipeline_en.md new file mode 100644 index 00000000000000..8d3f590d3b52df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-gal_sayula_popoluca_iw_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gal_sayula_popoluca_iw_3_pipeline pipeline XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: gal_sayula_popoluca_iw_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_sayula_popoluca_iw_3_pipeline` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iw_3_pipeline_en_5.5.0_3.0_1725704156556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iw_3_pipeline_en_5.5.0_3.0_1725704156556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gal_sayula_popoluca_iw_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gal_sayula_popoluca_iw_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_sayula_popoluca_iw_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|417.1 MB| + +## References + +https://huggingface.co/homersimpson/gal-pos-iw-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-generative_qas_pariwisata_bali_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-generative_qas_pariwisata_bali_pipeline_en.md new file mode 100644 index 00000000000000..576d7f52d03c3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-generative_qas_pariwisata_bali_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English generative_qas_pariwisata_bali_pipeline pipeline MPNetEmbeddings from SwastyMaharani +author: John Snow Labs +name: generative_qas_pariwisata_bali_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`generative_qas_pariwisata_bali_pipeline` is a English model originally trained by SwastyMaharani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/generative_qas_pariwisata_bali_pipeline_en_5.5.0_3.0_1725703184581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/generative_qas_pariwisata_bali_pipeline_en_5.5.0_3.0_1725703184581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("generative_qas_pariwisata_bali_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("generative_qas_pariwisata_bali_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|generative_qas_pariwisata_bali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/SwastyMaharani/generative-qas-pariwisata-bali + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_en.md new file mode 100644 index 00000000000000..f7ca88b2e7b942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gr_roberta_base RoBertaEmbeddings from macedonizer +author: John Snow Labs +name: gr_roberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gr_roberta_base` is a English model originally trained by macedonizer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gr_roberta_base_en_5.5.0_3.0_1725716598105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gr_roberta_base_en_5.5.0_3.0_1725716598105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("gr_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("gr_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gr_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/macedonizer/gr-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..0e0ae605a2bafa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-gr_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English gr_roberta_base_pipeline pipeline RoBertaEmbeddings from macedonizer +author: John Snow Labs +name: gr_roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gr_roberta_base_pipeline` is a English model originally trained by macedonizer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gr_roberta_base_pipeline_en_5.5.0_3.0_1725716613812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gr_roberta_base_pipeline_en_5.5.0_3.0_1725716613812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gr_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gr_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gr_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.6 MB| + +## References + +https://huggingface.co/macedonizer/gr-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_en.md new file mode 100644 index 00000000000000..56776a3d3b1fcf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English greeklegalroberta_v2 RoBertaEmbeddings from AI-team-UoA +author: John Snow Labs +name: greeklegalroberta_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greeklegalroberta_v2` is a English model originally trained by AI-team-UoA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greeklegalroberta_v2_en_5.5.0_3.0_1725716394167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greeklegalroberta_v2_en_5.5.0_3.0_1725716394167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("greeklegalroberta_v2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("greeklegalroberta_v2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greeklegalroberta_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/AI-team-UoA/GreekLegalRoBERTa_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_pipeline_en.md new file mode 100644 index 00000000000000..98264b3a741a63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-greeklegalroberta_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English greeklegalroberta_v2_pipeline pipeline RoBertaEmbeddings from AI-team-UoA +author: John Snow Labs +name: greeklegalroberta_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greeklegalroberta_v2_pipeline` is a English model originally trained by AI-team-UoA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greeklegalroberta_v2_pipeline_en_5.5.0_3.0_1725716415048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greeklegalroberta_v2_pipeline_en_5.5.0_3.0_1725716415048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("greeklegalroberta_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("greeklegalroberta_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greeklegalroberta_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/AI-team-UoA/GreekLegalRoBERTa_v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hafez_bert_fa.md b/docs/_posts/ahmedlone127/2024-09-07-hafez_bert_fa.md new file mode 100644 index 00000000000000..8dc112e24b07be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hafez_bert_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian hafez_bert BertEmbeddings from ViravirastSHZ +author: John Snow Labs +name: hafez_bert +date: 2024-09-07 +tags: [fa, open_source, onnx, embeddings, bert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hafez_bert` is a Persian model originally trained by ViravirastSHZ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hafez_bert_fa_5.5.0_3.0_1725696832695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hafez_bert_fa_5.5.0_3.0_1725696832695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("hafez_bert","fa") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("hafez_bert","fa") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hafez_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|fa| +|Size:|408.2 MB| + +## References + +https://huggingface.co/ViravirastSHZ/Hafez_Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_en.md b/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_en.md new file mode 100644 index 00000000000000..3638721a890774 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hasoc19_xlm_roberta_base_profane XlmRoBertaForSequenceClassification from SiddharthaM +author: John Snow Labs +name: hasoc19_xlm_roberta_base_profane +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hasoc19_xlm_roberta_base_profane` is a English model originally trained by SiddharthaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hasoc19_xlm_roberta_base_profane_en_5.5.0_3.0_1725671007875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hasoc19_xlm_roberta_base_profane_en_5.5.0_3.0_1725671007875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hasoc19_xlm_roberta_base_profane","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hasoc19_xlm_roberta_base_profane", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hasoc19_xlm_roberta_base_profane| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|802.8 MB| + +## References + +https://huggingface.co/SiddharthaM/hasoc19-xlm-roberta-base-profane \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_pipeline_en.md new file mode 100644 index 00000000000000..225045b2a7cd73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hasoc19_xlm_roberta_base_profane_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hasoc19_xlm_roberta_base_profane_pipeline pipeline XlmRoBertaForSequenceClassification from SiddharthaM +author: John Snow Labs +name: hasoc19_xlm_roberta_base_profane_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hasoc19_xlm_roberta_base_profane_pipeline` is a English model originally trained by SiddharthaM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hasoc19_xlm_roberta_base_profane_pipeline_en_5.5.0_3.0_1725671136338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hasoc19_xlm_roberta_base_profane_pipeline_en_5.5.0_3.0_1725671136338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hasoc19_xlm_roberta_base_profane_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hasoc19_xlm_roberta_base_profane_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hasoc19_xlm_roberta_base_profane_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|802.8 MB| + +## References + +https://huggingface.co/SiddharthaM/hasoc19-xlm-roberta-base-profane + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_en.md new file mode 100644 index 00000000000000..cc4c4c001ad791 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hate_hate_random2_seed2_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random2_seed2_bernice +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random2_seed2_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random2_seed2_bernice_en_5.5.0_3.0_1725670210830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random2_seed2_bernice_en_5.5.0_3.0_1725670210830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random2_seed2_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("hate_hate_random2_seed2_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random2_seed2_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|783.5 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random2_seed2-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_pipeline_en.md new file mode 100644 index 00000000000000..43ef14e4867a54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hate_hate_random2_seed2_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hate_hate_random2_seed2_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: hate_hate_random2_seed2_bernice_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_hate_random2_seed2_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_hate_random2_seed2_bernice_pipeline_en_5.5.0_3.0_1725670353012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_hate_random2_seed2_bernice_pipeline_en_5.5.0_3.0_1725670353012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hate_hate_random2_seed2_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hate_hate_random2_seed2_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_hate_random2_seed2_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.5 MB| + +## References + +https://huggingface.co/tweettemposhift/hate-hate_random2_seed2-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hate_speech_detection_mpnet_basev2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-hate_speech_detection_mpnet_basev2_pipeline_en.md new file mode 100644 index 00000000000000..f51a050eacd2d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hate_speech_detection_mpnet_basev2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hate_speech_detection_mpnet_basev2_pipeline pipeline MPNetForSequenceClassification from Arvnd03 +author: John Snow Labs +name: hate_speech_detection_mpnet_basev2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hate_speech_detection_mpnet_basev2_pipeline` is a English model originally trained by Arvnd03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hate_speech_detection_mpnet_basev2_pipeline_en_5.5.0_3.0_1725733402271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hate_speech_detection_mpnet_basev2_pipeline_en_5.5.0_3.0_1725733402271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hate_speech_detection_mpnet_basev2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hate_speech_detection_mpnet_basev2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hate_speech_detection_mpnet_basev2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.2 MB| + +## References + +https://huggingface.co/Arvnd03/Hate-Speech-Detection-mpnet-basev2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v12_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v12_pipeline_en.md new file mode 100644 index 00000000000000..e9361252f337e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v12_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English helsinki_danish_swedish_v12_pipeline pipeline MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v12_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v12_pipeline` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v12_pipeline_en_5.5.0_3.0_1725741362947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v12_pipeline_en_5.5.0_3.0_1725741362947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("helsinki_danish_swedish_v12_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("helsinki_danish_swedish_v12_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v12_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|497.3 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v12 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v2_en.md new file mode 100644 index 00000000000000..e97d6bebdca831 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-helsinki_danish_swedish_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English helsinki_danish_swedish_v2 MarianTransformer from Danieljacobsen +author: John Snow Labs +name: helsinki_danish_swedish_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helsinki_danish_swedish_v2` is a English model originally trained by Danieljacobsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v2_en_5.5.0_3.0_1725747576489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helsinki_danish_swedish_v2_en_5.5.0_3.0_1725747576489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("helsinki_danish_swedish_v2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("helsinki_danish_swedish_v2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helsinki_danish_swedish_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|497.6 MB| + +## References + +https://huggingface.co/Danieljacobsen/Helsinki-DA-SV-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_en.md b/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_en.md new file mode 100644 index 00000000000000..dc46f62ac2652d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hiner_romanian RoBertaForTokenClassification from TathagatAgrawal +author: John Snow Labs +name: hiner_romanian +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hiner_romanian` is a English model originally trained by TathagatAgrawal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hiner_romanian_en_5.5.0_3.0_1725707551024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hiner_romanian_en_5.5.0_3.0_1725707551024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("hiner_romanian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("hiner_romanian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hiner_romanian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|424.6 MB| + +## References + +https://huggingface.co/TathagatAgrawal/HiNER_RO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_pipeline_en.md new file mode 100644 index 00000000000000..1162cf0a186c56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hiner_romanian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hiner_romanian_pipeline pipeline RoBertaForTokenClassification from TathagatAgrawal +author: John Snow Labs +name: hiner_romanian_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hiner_romanian_pipeline` is a English model originally trained by TathagatAgrawal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hiner_romanian_pipeline_en_5.5.0_3.0_1725707584028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hiner_romanian_pipeline_en_5.5.0_3.0_1725707584028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hiner_romanian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hiner_romanian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hiner_romanian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|424.6 MB| + +## References + +https://huggingface.co/TathagatAgrawal/HiNER_RO + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_en.md b/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_en.md new file mode 100644 index 00000000000000..36b088a7c92f3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English humour_detection_xlmr XlmRoBertaForSequenceClassification from likhithasapu +author: John Snow Labs +name: humour_detection_xlmr +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`humour_detection_xlmr` is a English model originally trained by likhithasapu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/humour_detection_xlmr_en_5.5.0_3.0_1725670483382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/humour_detection_xlmr_en_5.5.0_3.0_1725670483382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("humour_detection_xlmr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("humour_detection_xlmr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|humour_detection_xlmr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|801.1 MB| + +## References + +https://huggingface.co/likhithasapu/humour-detection-xlmr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_pipeline_en.md new file mode 100644 index 00000000000000..d65794608afbc5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-humour_detection_xlmr_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English humour_detection_xlmr_pipeline pipeline XlmRoBertaForSequenceClassification from likhithasapu +author: John Snow Labs +name: humour_detection_xlmr_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`humour_detection_xlmr_pipeline` is a English model originally trained by likhithasapu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/humour_detection_xlmr_pipeline_en_5.5.0_3.0_1725670604800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/humour_detection_xlmr_pipeline_en_5.5.0_3.0_1725670604800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("humour_detection_xlmr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("humour_detection_xlmr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|humour_detection_xlmr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|801.1 MB| + +## References + +https://huggingface.co/likhithasapu/humour-detection-xlmr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-hupd_distilroberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-hupd_distilroberta_base_en.md new file mode 100644 index 00000000000000..6a9551d46753b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-hupd_distilroberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hupd_distilroberta_base RoBertaEmbeddings from HUPD +author: John Snow Labs +name: hupd_distilroberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hupd_distilroberta_base` is a English model originally trained by HUPD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hupd_distilroberta_base_en_5.5.0_3.0_1725716515667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hupd_distilroberta_base_en_5.5.0_3.0_1725716515667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("hupd_distilroberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("hupd_distilroberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hupd_distilroberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/HUPD/hupd-distilroberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-idiom_xlm_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-idiom_xlm_roberta_pipeline_en.md new file mode 100644 index 00000000000000..bb0049cab8cb06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-idiom_xlm_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English idiom_xlm_roberta_pipeline pipeline XlmRoBertaForTokenClassification from imranraad +author: John Snow Labs +name: idiom_xlm_roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`idiom_xlm_roberta_pipeline` is a English model originally trained by imranraad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/idiom_xlm_roberta_pipeline_en_5.5.0_3.0_1725688632302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/idiom_xlm_roberta_pipeline_en_5.5.0_3.0_1725688632302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("idiom_xlm_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("idiom_xlm_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|idiom_xlm_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|803.8 MB| + +## References + +https://huggingface.co/imranraad/idiom-xlm-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_en.md b/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_en.md new file mode 100644 index 00000000000000..dfeaba682fb734 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English ife_sentence_model2 MPNetEmbeddings from jesspi +author: John Snow Labs +name: ife_sentence_model2 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ife_sentence_model2` is a English model originally trained by jesspi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ife_sentence_model2_en_5.5.0_3.0_1725703320430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ife_sentence_model2_en_5.5.0_3.0_1725703320430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("ife_sentence_model2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("ife_sentence_model2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ife_sentence_model2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/jesspi/IFE-sentence-model2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_pipeline_en.md new file mode 100644 index 00000000000000..9080767b0a03dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ife_sentence_model2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English ife_sentence_model2_pipeline pipeline MPNetEmbeddings from jesspi +author: John Snow Labs +name: ife_sentence_model2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ife_sentence_model2_pipeline` is a English model originally trained by jesspi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ife_sentence_model2_pipeline_en_5.5.0_3.0_1725703340024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ife_sentence_model2_pipeline_en_5.5.0_3.0_1725703340024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ife_sentence_model2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ife_sentence_model2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ife_sentence_model2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/jesspi/IFE-sentence-model2 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-improved_xlm_attempt2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-improved_xlm_attempt2_pipeline_en.md new file mode 100644 index 00000000000000..aeba3a0da4fbef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-improved_xlm_attempt2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English improved_xlm_attempt2_pipeline pipeline XlmRoBertaForSequenceClassification from Anwaarma +author: John Snow Labs +name: improved_xlm_attempt2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improved_xlm_attempt2_pipeline` is a English model originally trained by Anwaarma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improved_xlm_attempt2_pipeline_en_5.5.0_3.0_1725670841378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improved_xlm_attempt2_pipeline_en_5.5.0_3.0_1725670841378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("improved_xlm_attempt2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("improved_xlm_attempt2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improved_xlm_attempt2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Anwaarma/Improved-xlm-attempt2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_pipeline_xx.md new file mode 100644 index 00000000000000..7f2a32752f7ca1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual indicbertv2_mlm_sam_tlm_pipeline pipeline BertEmbeddings from ai4bharat +author: John Snow Labs +name: indicbertv2_mlm_sam_tlm_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbertv2_mlm_sam_tlm_pipeline` is a Multilingual model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbertv2_mlm_sam_tlm_pipeline_xx_5.5.0_3.0_1725697106782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbertv2_mlm_sam_tlm_pipeline_xx_5.5.0_3.0_1725697106782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("indicbertv2_mlm_sam_tlm_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("indicbertv2_mlm_sam_tlm_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbertv2_mlm_sam_tlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-Sam-TLM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_xx.md b/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_xx.md new file mode 100644 index 00000000000000..17878bd0606753 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-indicbertv2_mlm_sam_tlm_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual indicbertv2_mlm_sam_tlm BertEmbeddings from ai4bharat +author: John Snow Labs +name: indicbertv2_mlm_sam_tlm +date: 2024-09-07 +tags: [xx, open_source, onnx, embeddings, bert] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indicbertv2_mlm_sam_tlm` is a Multilingual model originally trained by ai4bharat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indicbertv2_mlm_sam_tlm_xx_5.5.0_3.0_1725697059824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indicbertv2_mlm_sam_tlm_xx_5.5.0_3.0_1725697059824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("indicbertv2_mlm_sam_tlm","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("indicbertv2_mlm_sam_tlm","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indicbertv2_mlm_sam_tlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ai4bharat/IndicBERTv2-MLM-Sam-TLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-intent_global_en.md b/docs/_posts/ahmedlone127/2024-09-07-intent_global_en.md new file mode 100644 index 00000000000000..30f0f72b983a09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-intent_global_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English intent_global RoBertaForSequenceClassification from Onebu +author: John Snow Labs +name: intent_global +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intent_global` is a English model originally trained by Onebu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intent_global_en_5.5.0_3.0_1725718404280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intent_global_en_5.5.0_3.0_1725718404280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("intent_global","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("intent_global", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intent_global| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|420.7 MB| + +## References + +https://huggingface.co/Onebu/intent-global \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-isom5240_whisper_small_zhhk_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-isom5240_whisper_small_zhhk_1_pipeline_en.md new file mode 100644 index 00000000000000..389424f1ef0ab8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-isom5240_whisper_small_zhhk_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English isom5240_whisper_small_zhhk_1_pipeline pipeline WhisperForCTC from RexChan +author: John Snow Labs +name: isom5240_whisper_small_zhhk_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`isom5240_whisper_small_zhhk_1_pipeline` is a English model originally trained by RexChan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/isom5240_whisper_small_zhhk_1_pipeline_en_5.5.0_3.0_1725750329739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/isom5240_whisper_small_zhhk_1_pipeline_en_5.5.0_3.0_1725750329739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("isom5240_whisper_small_zhhk_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("isom5240_whisper_small_zhhk_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|isom5240_whisper_small_zhhk_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/RexChan/ISOM5240-whisper-small-zhhk_1 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en.md new file mode 100644 index 00000000000000..f02dd3c3103e2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English iwslt17_marian_big_ctx4_cwd3_english_french_pipeline pipeline MarianTransformer from context-mt +author: John Snow Labs +name: iwslt17_marian_big_ctx4_cwd3_english_french_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`iwslt17_marian_big_ctx4_cwd3_english_french_pipeline` is a English model originally trained by context-mt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en_5.5.0_3.0_1725741589889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/iwslt17_marian_big_ctx4_cwd3_english_french_pipeline_en_5.5.0_3.0_1725741589889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("iwslt17_marian_big_ctx4_cwd3_english_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("iwslt17_marian_big_ctx4_cwd3_english_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|iwslt17_marian_big_ctx4_cwd3_english_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/context-mt/iwslt17-marian-big-ctx4-cwd3-en-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_small_ctx4_cwd1_english_french_en.md b/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_small_ctx4_cwd1_english_french_en.md new file mode 100644 index 00000000000000..4c3bf1eda3085a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-iwslt17_marian_small_ctx4_cwd1_english_french_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English iwslt17_marian_small_ctx4_cwd1_english_french MarianTransformer from context-mt +author: John Snow Labs +name: iwslt17_marian_small_ctx4_cwd1_english_french +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`iwslt17_marian_small_ctx4_cwd1_english_french` is a English model originally trained by context-mt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/iwslt17_marian_small_ctx4_cwd1_english_french_en_5.5.0_3.0_1725748064331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/iwslt17_marian_small_ctx4_cwd1_english_french_en_5.5.0_3.0_1725748064331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("iwslt17_marian_small_ctx4_cwd1_english_french","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("iwslt17_marian_small_ctx4_cwd1_english_french","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|iwslt17_marian_small_ctx4_cwd1_english_french| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.3 MB| + +## References + +https://huggingface.co/context-mt/iwslt17-marian-small-ctx4-cwd1-en-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en.md new file mode 100644 index 00000000000000..383332188a717c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline pipeline RoBertaForTokenClassification from katrinatan +author: John Snow Labs +name: jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline` is a English model originally trained by katrinatan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en_5.5.0_3.0_1725721382321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline_en_5.5.0_3.0_1725721382321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jcblaise_roberta_tagalog_base_ft_udpos213_top2lang_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/katrinatan/jcblaise-roberta-tagalog-base_ft_udpos213-top2lang + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_cheyannelam_en.md b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_cheyannelam_en.md new file mode 100644 index 00000000000000..8458fb9af8cfd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_cheyannelam_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_finetuning_cheyannelam MarianTransformer from cheyannelam +author: John Snow Labs +name: lab1_finetuning_cheyannelam +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_cheyannelam` is a English model originally trained by cheyannelam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_cheyannelam_en_5.5.0_3.0_1725741427197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_cheyannelam_en_5.5.0_3.0_1725741427197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_finetuning_cheyannelam","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_finetuning_cheyannelam","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_cheyannelam| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/cheyannelam/lab1_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_en.md b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_en.md new file mode 100644 index 00000000000000..2e78ff9aa1f5d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_finetuning_daanjiri MarianTransformer from daanjiri +author: John Snow Labs +name: lab1_finetuning_daanjiri +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_daanjiri` is a English model originally trained by daanjiri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_daanjiri_en_5.5.0_3.0_1725747611077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_daanjiri_en_5.5.0_3.0_1725747611077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_finetuning_daanjiri","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_finetuning_daanjiri","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_daanjiri| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/daanjiri/lab1_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_pipeline_en.md new file mode 100644 index 00000000000000..c559c15e44a919 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lab1_finetuning_daanjiri_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_finetuning_daanjiri_pipeline pipeline MarianTransformer from daanjiri +author: John Snow Labs +name: lab1_finetuning_daanjiri_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_finetuning_daanjiri_pipeline` is a English model originally trained by daanjiri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_finetuning_daanjiri_pipeline_en_5.5.0_3.0_1725747639480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_finetuning_daanjiri_pipeline_en_5.5.0_3.0_1725747639480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_finetuning_daanjiri_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_finetuning_daanjiri_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_finetuning_daanjiri_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/daanjiri/lab1_finetuning + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lab1_random_daanjiri_en.md b/docs/_posts/ahmedlone127/2024-09-07-lab1_random_daanjiri_en.md new file mode 100644 index 00000000000000..085ce69f1be5cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lab1_random_daanjiri_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lab1_random_daanjiri MarianTransformer from daanjiri +author: John Snow Labs +name: lab1_random_daanjiri +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_daanjiri` is a English model originally trained by daanjiri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_daanjiri_en_5.5.0_3.0_1725747035133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_daanjiri_en_5.5.0_3.0_1725747035133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("lab1_random_daanjiri","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("lab1_random_daanjiri","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_daanjiri| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/daanjiri/lab1_random \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lab1_random_sfliao_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lab1_random_sfliao_pipeline_en.md new file mode 100644 index 00000000000000..4386bf731ad16e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lab1_random_sfliao_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lab1_random_sfliao_pipeline pipeline MarianTransformer from sfliao +author: John Snow Labs +name: lab1_random_sfliao_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lab1_random_sfliao_pipeline` is a English model originally trained by sfliao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lab1_random_sfliao_pipeline_en_5.5.0_3.0_1725746882512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lab1_random_sfliao_pipeline_en_5.5.0_3.0_1725746882512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lab1_random_sfliao_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lab1_random_sfliao_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lab1_random_sfliao_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/sfliao/lab1_random + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_en.md new file mode 100644 index 00000000000000..9a91b81367e14c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lbt5_large T5Transformer from timkolber +author: John Snow Labs +name: lbt5_large +date: 2024-09-07 +tags: [en, open_source, onnx, t5, question_answering, summarization, translation, text_generation] +task: [Question Answering, Summarization, Translation, Text Generation] +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: T5Transformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained T5Transformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lbt5_large` is a English model originally trained by timkolber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lbt5_large_en_5.5.0_3.0_1725719638034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lbt5_large_en_5.5.0_3.0_1725719638034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +t5 = T5Transformer.pretrained("lbt5_large","en") \ + .setInputCols(["document"]) \ + .setOutputCol("output") + +pipeline = Pipeline().setStages([documentAssembler, t5]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val t5 = T5Transformer.pretrained("lbt5_large", "en") + .setInputCols(Array("documents")) + .setOutputCol("output") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, t5)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lbt5_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[output]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/timkolber/lbt5-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_pipeline_en.md new file mode 100644 index 00000000000000..9ca5ba8bb123ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lbt5_large_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lbt5_large_pipeline pipeline T5Transformer from timkolber +author: John Snow Labs +name: lbt5_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: [Question Answering, Summarization, Translation, Text Generation] +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained T5Transformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lbt5_large_pipeline` is a English model originally trained by timkolber. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lbt5_large_pipeline_en_5.5.0_3.0_1725720118530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lbt5_large_pipeline_en_5.5.0_3.0_1725720118530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lbt5_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lbt5_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lbt5_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/timkolber/lbt5-large + +## Included Models + +- DocumentAssembler +- T5Transformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-legal_longformer_base_4096_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-07-legal_longformer_base_4096_spanish_es.md new file mode 100644 index 00000000000000..960eba0aada8f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-legal_longformer_base_4096_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish legal_longformer_base_4096_spanish RoBertaEmbeddings from Narrativa +author: John Snow Labs +name: legal_longformer_base_4096_spanish +date: 2024-09-07 +tags: [es, open_source, onnx, embeddings, roberta] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_longformer_base_4096_spanish` is a Castilian, Spanish model originally trained by Narrativa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_longformer_base_4096_spanish_es_5.5.0_3.0_1725698702089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_longformer_base_4096_spanish_es_5.5.0_3.0_1725698702089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("legal_longformer_base_4096_spanish","es") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("legal_longformer_base_4096_spanish","es") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_longformer_base_4096_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|es| +|Size:|478.9 MB| + +## References + +https://huggingface.co/Narrativa/legal-longformer-base-4096-spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_en.md new file mode 100644 index 00000000000000..75d5c7c765d72b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English leia_lm_large RoBertaEmbeddings from LEIA +author: John Snow Labs +name: leia_lm_large +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`leia_lm_large` is a English model originally trained by LEIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/leia_lm_large_en_5.5.0_3.0_1725716747076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/leia_lm_large_en_5.5.0_3.0_1725716747076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("leia_lm_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("leia_lm_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|leia_lm_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/LEIA/LEIA-LM-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_pipeline_en.md new file mode 100644 index 00000000000000..56d6b0b69fdb46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-leia_lm_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English leia_lm_large_pipeline pipeline RoBertaEmbeddings from LEIA +author: John Snow Labs +name: leia_lm_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`leia_lm_large_pipeline` is a English model originally trained by LEIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/leia_lm_large_pipeline_en_5.5.0_3.0_1725716804649.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/leia_lm_large_pipeline_en_5.5.0_3.0_1725716804649.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("leia_lm_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("leia_lm_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|leia_lm_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/LEIA/LEIA-LM-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-llama_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-llama_model_en.md new file mode 100644 index 00000000000000..14ea57b0a88894 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-llama_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English llama_model DistilBertForSequenceClassification from sidd272 +author: John Snow Labs +name: llama_model +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`llama_model` is a English model originally trained by sidd272. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/llama_model_en_5.5.0_3.0_1725674978610.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/llama_model_en_5.5.0_3.0_1725674978610.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("llama_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("llama_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|llama_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sidd272/Llama_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lld_valbadia_ita_loresmt_l4_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-07-lld_valbadia_ita_loresmt_l4_pipeline_it.md new file mode 100644 index 00000000000000..305c98a18943f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lld_valbadia_ita_loresmt_l4_pipeline_it.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Italian lld_valbadia_ita_loresmt_l4_pipeline pipeline MarianTransformer from sfrontull +author: John Snow Labs +name: lld_valbadia_ita_loresmt_l4_pipeline +date: 2024-09-07 +tags: [it, open_source, pipeline, onnx] +task: Translation +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lld_valbadia_ita_loresmt_l4_pipeline` is a Italian model originally trained by sfrontull. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lld_valbadia_ita_loresmt_l4_pipeline_it_5.5.0_3.0_1725740935736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lld_valbadia_ita_loresmt_l4_pipeline_it_5.5.0_3.0_1725740935736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lld_valbadia_ita_loresmt_l4_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lld_valbadia_ita_loresmt_l4_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lld_valbadia_ita_loresmt_l4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|410.9 MB| + +## References + +https://huggingface.co/sfrontull/lld_valbadia-ita-loresmt-L4 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_en.md b/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_en.md new file mode 100644 index 00000000000000..dde88f0cfa5c32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English lm_ner_skills_extractor_bert BertForTokenClassification from GalalEwida +author: John Snow Labs +name: lm_ner_skills_extractor_bert +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_ner_skills_extractor_bert` is a English model originally trained by GalalEwida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_ner_skills_extractor_bert_en_5.5.0_3.0_1725701634524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_ner_skills_extractor_bert_en_5.5.0_3.0_1725701634524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("lm_ner_skills_extractor_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("lm_ner_skills_extractor_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_ner_skills_extractor_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/GalalEwida/lm-ner-skills-extractor_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_pipeline_en.md new file mode 100644 index 00000000000000..cba4f565328f79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lm_ner_skills_extractor_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lm_ner_skills_extractor_bert_pipeline pipeline BertForTokenClassification from GalalEwida +author: John Snow Labs +name: lm_ner_skills_extractor_bert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_ner_skills_extractor_bert_pipeline` is a English model originally trained by GalalEwida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_ner_skills_extractor_bert_pipeline_en_5.5.0_3.0_1725701652411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_ner_skills_extractor_bert_pipeline_en_5.5.0_3.0_1725701652411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lm_ner_skills_extractor_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lm_ner_skills_extractor_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_ner_skills_extractor_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/GalalEwida/lm-ner-skills-extractor_BERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-logprecis_en.md b/docs/_posts/ahmedlone127/2024-09-07-logprecis_en.md new file mode 100644 index 00000000000000..8db576b003ce07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-logprecis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English logprecis RoBertaForTokenClassification from SmartDataPolito +author: John Snow Labs +name: logprecis +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`logprecis` is a English model originally trained by SmartDataPolito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/logprecis_en_5.5.0_3.0_1725708325034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/logprecis_en_5.5.0_3.0_1725708325034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("logprecis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("logprecis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|logprecis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/SmartDataPolito/logprecis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_en.md b/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_en.md new file mode 100644 index 00000000000000..fde40bcca6e4fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lr1e5_bs8_distilbert_qa_pytorch_full DistilBertForQuestionAnswering from tyavika +author: John Snow Labs +name: lr1e5_bs8_distilbert_qa_pytorch_full +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lr1e5_bs8_distilbert_qa_pytorch_full` is a English model originally trained by tyavika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lr1e5_bs8_distilbert_qa_pytorch_full_en_5.5.0_3.0_1725745711307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lr1e5_bs8_distilbert_qa_pytorch_full_en_5.5.0_3.0_1725745711307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("lr1e5_bs8_distilbert_qa_pytorch_full","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("lr1e5_bs8_distilbert_qa_pytorch_full", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lr1e5_bs8_distilbert_qa_pytorch_full| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tyavika/LR1E5-BS8-Distilbert-QA-Pytorch-FULL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en.md new file mode 100644 index 00000000000000..e8ef6ccec5a7cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lr1e5_bs8_distilbert_qa_pytorch_full_pipeline pipeline DistilBertForQuestionAnswering from tyavika +author: John Snow Labs +name: lr1e5_bs8_distilbert_qa_pytorch_full_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lr1e5_bs8_distilbert_qa_pytorch_full_pipeline` is a English model originally trained by tyavika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en_5.5.0_3.0_1725745724927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lr1e5_bs8_distilbert_qa_pytorch_full_pipeline_en_5.5.0_3.0_1725745724927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lr1e5_bs8_distilbert_qa_pytorch_full_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lr1e5_bs8_distilbert_qa_pytorch_full_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lr1e5_bs8_distilbert_qa_pytorch_full_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/tyavika/LR1E5-BS8-Distilbert-QA-Pytorch-FULL + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_en.md b/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_en.md new file mode 100644 index 00000000000000..4d124bd79aa750 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English lthien_tra_bai_tieng_anh_chuyen_nganh DistilBertForQuestionAnswering from hi113 +author: John Snow Labs +name: lthien_tra_bai_tieng_anh_chuyen_nganh +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lthien_tra_bai_tieng_anh_chuyen_nganh` is a English model originally trained by hi113. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lthien_tra_bai_tieng_anh_chuyen_nganh_en_5.5.0_3.0_1725736185460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lthien_tra_bai_tieng_anh_chuyen_nganh_en_5.5.0_3.0_1725736185460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("lthien_tra_bai_tieng_anh_chuyen_nganh","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("lthien_tra_bai_tieng_anh_chuyen_nganh", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lthien_tra_bai_tieng_anh_chuyen_nganh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hi113/ltHien_Tra_Bai_Tieng_Anh_Chuyen_Nganh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en.md new file mode 100644 index 00000000000000..2ee10b4ba2842a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline pipeline DistilBertForQuestionAnswering from hi113 +author: John Snow Labs +name: lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline` is a English model originally trained by hi113. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en_5.5.0_3.0_1725736196635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline_en_5.5.0_3.0_1725736196635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lthien_tra_bai_tieng_anh_chuyen_nganh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/hi113/ltHien_Tra_Bai_Tieng_Anh_Chuyen_Nganh + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_mk.md b/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_mk.md new file mode 100644 index 00000000000000..3eb8a4eb886696 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_mk.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Macedonian macedonian_roberta_base RoBertaEmbeddings from macedonizer +author: John Snow Labs +name: macedonian_roberta_base +date: 2024-09-07 +tags: [mk, open_source, onnx, embeddings, roberta] +task: Embeddings +language: mk +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`macedonian_roberta_base` is a Macedonian model originally trained by macedonizer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/macedonian_roberta_base_mk_5.5.0_3.0_1725678754697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/macedonian_roberta_base_mk_5.5.0_3.0_1725678754697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("macedonian_roberta_base","mk") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("macedonian_roberta_base","mk") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|macedonian_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|mk| +|Size:|311.8 MB| + +## References + +https://huggingface.co/macedonizer/mk-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_pipeline_mk.md b/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_pipeline_mk.md new file mode 100644 index 00000000000000..d3a7c199ff474a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-macedonian_roberta_base_pipeline_mk.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Macedonian macedonian_roberta_base_pipeline pipeline RoBertaEmbeddings from macedonizer +author: John Snow Labs +name: macedonian_roberta_base_pipeline +date: 2024-09-07 +tags: [mk, open_source, pipeline, onnx] +task: Embeddings +language: mk +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`macedonian_roberta_base_pipeline` is a Macedonian model originally trained by macedonizer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/macedonian_roberta_base_pipeline_mk_5.5.0_3.0_1725678768825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/macedonian_roberta_base_pipeline_mk_5.5.0_3.0_1725678768825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("macedonian_roberta_base_pipeline", lang = "mk") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("macedonian_roberta_base_pipeline", lang = "mk") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|macedonian_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mk| +|Size:|311.9 MB| + +## References + +https://huggingface.co/macedonizer/mk-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-maltese_norwegian_swedish_finetuned_pipeline_sv.md b/docs/_posts/ahmedlone127/2024-09-07-maltese_norwegian_swedish_finetuned_pipeline_sv.md new file mode 100644 index 00000000000000..971078549ef3f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-maltese_norwegian_swedish_finetuned_pipeline_sv.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Swedish maltese_norwegian_swedish_finetuned_pipeline pipeline MarianTransformer from oskarandrsson +author: John Snow Labs +name: maltese_norwegian_swedish_finetuned_pipeline +date: 2024-09-07 +tags: [sv, open_source, pipeline, onnx] +task: Translation +language: sv +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`maltese_norwegian_swedish_finetuned_pipeline` is a Swedish model originally trained by oskarandrsson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/maltese_norwegian_swedish_finetuned_pipeline_sv_5.5.0_3.0_1725747442822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/maltese_norwegian_swedish_finetuned_pipeline_sv_5.5.0_3.0_1725747442822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("maltese_norwegian_swedish_finetuned_pipeline", lang = "sv") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("maltese_norwegian_swedish_finetuned_pipeline", lang = "sv") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|maltese_norwegian_swedish_finetuned_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sv| +|Size:|204.8 MB| + +## References + +https://huggingface.co/oskarandrsson/mt-no-sv-finetuned + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-malurl_roberta_10e_en.md b/docs/_posts/ahmedlone127/2024-09-07-malurl_roberta_10e_en.md new file mode 100644 index 00000000000000..dfe3280fb72b90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-malurl_roberta_10e_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English malurl_roberta_10e RoBertaForSequenceClassification from bgspaditya +author: John Snow Labs +name: malurl_roberta_10e +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malurl_roberta_10e` is a English model originally trained by bgspaditya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malurl_roberta_10e_en_5.5.0_3.0_1725679489551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malurl_roberta_10e_en_5.5.0_3.0_1725679489551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("malurl_roberta_10e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("malurl_roberta_10e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malurl_roberta_10e| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|438.4 MB| + +## References + +https://huggingface.co/bgspaditya/malurl-roberta-10e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en.md b/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en.md new file mode 100644 index 00000000000000..6427e3dc4aab30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marefa_maltese_english_arabic_parallel_10k_splitted_cosine MarianTransformer from HamdanXI +author: John Snow Labs +name: marefa_maltese_english_arabic_parallel_10k_splitted_cosine +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marefa_maltese_english_arabic_parallel_10k_splitted_cosine` is a English model originally trained by HamdanXI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en_5.5.0_3.0_1725740707148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marefa_maltese_english_arabic_parallel_10k_splitted_cosine_en_5.5.0_3.0_1725740707148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marefa_maltese_english_arabic_parallel_10k_splitted_cosine","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marefa_maltese_english_arabic_parallel_10k_splitted_cosine","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marefa_maltese_english_arabic_parallel_10k_splitted_cosine| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.7 MB| + +## References + +https://huggingface.co/HamdanXI/marefa-mt-en-ar-parallel-10k-splitted-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en.md new file mode 100644 index 00000000000000..a7a888a15b7152 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline pipeline MarianTransformer from HamdanXI +author: John Snow Labs +name: marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline` is a English model originally trained by HamdanXI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en_5.5.0_3.0_1725740732814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline_en_5.5.0_3.0_1725740732814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marefa_maltese_english_arabic_parallel_10k_splitted_cosine_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.2 MB| + +## References + +https://huggingface.co/HamdanXI/marefa-mt-en-ar-parallel-10k-splitted-cosine + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_en.md new file mode 100644 index 00000000000000..161d4df08964e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_combined_dataset_1_1 MarianTransformer from kalcho100 +author: John Snow Labs +name: marian_finetuned_combined_dataset_1_1 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_combined_dataset_1_1` is a English model originally trained by kalcho100. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_combined_dataset_1_1_en_5.5.0_3.0_1725741111319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_combined_dataset_1_1_en_5.5.0_3.0_1725741111319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_combined_dataset_1_1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_combined_dataset_1_1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_combined_dataset_1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|547.5 MB| + +## References + +https://huggingface.co/kalcho100/Marian-finetuned_combined_dataset_1_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_pipeline_en.md new file mode 100644 index 00000000000000..9446b2d60ead38 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_combined_dataset_1_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_combined_dataset_1_1_pipeline pipeline MarianTransformer from kalcho100 +author: John Snow Labs +name: marian_finetuned_combined_dataset_1_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_combined_dataset_1_1_pipeline` is a English model originally trained by kalcho100. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_combined_dataset_1_1_pipeline_en_5.5.0_3.0_1725741137314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_combined_dataset_1_1_pipeline_en_5.5.0_3.0_1725741137314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_combined_dataset_1_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_combined_dataset_1_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_combined_dataset_1_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|548.0 MB| + +## References + +https://huggingface.co/kalcho100/Marian-finetuned_combined_dataset_1_1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en.md new file mode 100644 index 00000000000000..9c4b42b622a073 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong MarianTransformer from chandrasutrisnotjhong +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong` is a English model originally trained by chandrasutrisnotjhong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en_5.5.0_3.0_1725747745532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_en_5.5.0_3.0_1725747745532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/chandrasutrisnotjhong/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en.md new file mode 100644 index 00000000000000..7b62b371a7a844 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline pipeline MarianTransformer from chandrasutrisnotjhong +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline` is a English model originally trained by chandrasutrisnotjhong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en_5.5.0_3.0_1725747771116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline_en_5.5.0_3.0_1725747771116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_chandrasutrisnotjhong_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.7 MB| + +## References + +https://huggingface.co/chandrasutrisnotjhong/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en.md new file mode 100644 index 00000000000000..531217c886772a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2 MarianTransformer from hjhj2 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2` is a English model originally trained by hjhj2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en_5.5.0_3.0_1725747372100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_en_5.5.0_3.0_1725747372100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|2.8 GB| + +## References + +https://huggingface.co/hjhj2/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en.md new file mode 100644 index 00000000000000..d77c2369e8cd2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline pipeline MarianTransformer from hjhj2 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline` is a English model originally trained by hjhj2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en_5.5.0_3.0_1725747506747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline_en_5.5.0_3.0_1725747506747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_hjhj2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|2.8 GB| + +## References + +https://huggingface.co/hjhj2/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en.md new file mode 100644 index 00000000000000..8a53ae4f8fa6ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa MarianTransformer from Leisa +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa` is a English model originally trained by Leisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en_5.5.0_3.0_1725741680443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_en_5.5.0_3.0_1725741680443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/Leisa/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en.md new file mode 100644 index 00000000000000..6c087fcad641f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline pipeline MarianTransformer from Leisa +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline` is a English model originally trained by Leisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en_5.5.0_3.0_1725741707560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline_en_5.5.0_3.0_1725741707560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_leisa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.7 MB| + +## References + +https://huggingface.co/Leisa/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en.md new file mode 100644 index 00000000000000..f8cd4fb633a97e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya MarianTransformer from naninya +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya` is a English model originally trained by naninya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en_5.5.0_3.0_1725740325724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya_en_5.5.0_3.0_1725740325724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_naninya| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.4 MB| + +## References + +https://huggingface.co/naninya/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en.md new file mode 100644 index 00000000000000..efc81179ac22c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty MarianTransformer from satyashetty +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty` is a English model originally trained by satyashetty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en_5.5.0_3.0_1725747372190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_en_5.5.0_3.0_1725747372190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/satyashetty/marian-finetuned-kde4-en-to-fr-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en.md new file mode 100644 index 00000000000000..a571321923d579 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline pipeline MarianTransformer from satyashetty +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline` is a English model originally trained by satyashetty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en_5.5.0_3.0_1725747398834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline_en_5.5.0_3.0_1725747398834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_satyashetty_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.8 MB| + +## References + +https://huggingface.co/satyashetty/marian-finetuned-kde4-en-to-fr-accelerate + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en.md new file mode 100644 index 00000000000000..7307d406bf7044 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp MarianTransformer from Pickupppp +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp` is a English model originally trained by Pickupppp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en_5.5.0_3.0_1725740931822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_en_5.5.0_3.0_1725740931822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/Pickupppp/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en.md new file mode 100644 index 00000000000000..aaf2675eccbf8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline pipeline MarianTransformer from Pickupppp +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline` is a English model originally trained by Pickupppp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en_5.5.0_3.0_1725740955661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline_en_5.5.0_3.0_1725740955661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_pickupppp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/Pickupppp/marian-finetuned-kde4-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en.md new file mode 100644 index 00000000000000..e67d0f51058c63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang MarianTransformer from yimhuang +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang` is a English model originally trained by yimhuang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en_5.5.0_3.0_1725747810196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang_en_5.5.0_3.0_1725747810196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_yimhuang| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/yimhuang/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en.md new file mode 100644 index 00000000000000..81359c3802f1d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm MarianTransformer from thecriticalpoint +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm` is a English model originally trained by thecriticalpoint. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en_5.5.0_3.0_1725747357053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_en_5.5.0_3.0_1725747357053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|523.1 MB| + +## References + +https://huggingface.co/thecriticalpoint/marian-finetuned-kde4-en-to-hindi_comm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en.md new file mode 100644 index 00000000000000..68ab0592c8a1eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline pipeline MarianTransformer from thecriticalpoint +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline` is a English model originally trained by thecriticalpoint. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en_5.5.0_3.0_1725747380971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline_en_5.5.0_3.0_1725747380971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_hindi_comm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|523.6 MB| + +## References + +https://huggingface.co/thecriticalpoint/marian-finetuned-kde4-en-to-hindi_comm + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_frwo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_frwo_pipeline_en.md new file mode 100644 index 00000000000000..9e741a46ee7709 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_frwo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_frwo_pipeline pipeline MarianTransformer from cibfaye +author: John Snow Labs +name: marian_frwo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_frwo_pipeline` is a English model originally trained by cibfaye. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_frwo_pipeline_en_5.5.0_3.0_1725747977661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_frwo_pipeline_en_5.5.0_3.0_1725747977661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_frwo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_frwo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_frwo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/cibfaye/marian-frwo + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marian_maltese_bbc_nigerian_pidgin_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marian_maltese_bbc_nigerian_pidgin_english_pipeline_en.md new file mode 100644 index 00000000000000..ae13ab6a3ea061 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marian_maltese_bbc_nigerian_pidgin_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marian_maltese_bbc_nigerian_pidgin_english_pipeline pipeline MarianTransformer from NITHUB-AI +author: John Snow Labs +name: marian_maltese_bbc_nigerian_pidgin_english_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_maltese_bbc_nigerian_pidgin_english_pipeline` is a English model originally trained by NITHUB-AI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_maltese_bbc_nigerian_pidgin_english_pipeline_en_5.5.0_3.0_1725740989791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_maltese_bbc_nigerian_pidgin_english_pipeline_en_5.5.0_3.0_1725740989791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marian_maltese_bbc_nigerian_pidgin_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marian_maltese_bbc_nigerian_pidgin_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_maltese_bbc_nigerian_pidgin_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|295.9 MB| + +## References + +https://huggingface.co/NITHUB-AI/marian-mt-bbc-pcm-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-marianmt_ufal_english_spanish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-marianmt_ufal_english_spanish_pipeline_en.md new file mode 100644 index 00000000000000..e7af140e5770f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-marianmt_ufal_english_spanish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English marianmt_ufal_english_spanish_pipeline pipeline MarianTransformer from irenelizihui +author: John Snow Labs +name: marianmt_ufal_english_spanish_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marianmt_ufal_english_spanish_pipeline` is a English model originally trained by irenelizihui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_spanish_pipeline_en_5.5.0_3.0_1725740940232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marianmt_ufal_english_spanish_pipeline_en_5.5.0_3.0_1725740940232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("marianmt_ufal_english_spanish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("marianmt_ufal_english_spanish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marianmt_ufal_english_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|540.2 MB| + +## References + +https://huggingface.co/irenelizihui/MarianMT_UFAL_en_es + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_en.md b/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_en.md new file mode 100644 index 00000000000000..fb48d7cbf40104 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mbti_classification_roberta_base_aug RoBertaForSequenceClassification from Shunian +author: John Snow Labs +name: mbti_classification_roberta_base_aug +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbti_classification_roberta_base_aug` is a English model originally trained by Shunian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbti_classification_roberta_base_aug_en_5.5.0_3.0_1725679836749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbti_classification_roberta_base_aug_en_5.5.0_3.0_1725679836749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("mbti_classification_roberta_base_aug","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("mbti_classification_roberta_base_aug", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbti_classification_roberta_base_aug| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|467.0 MB| + +## References + +https://huggingface.co/Shunian/mbti-classification-roberta-base-aug \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_pipeline_en.md new file mode 100644 index 00000000000000..3fbc2d976a53ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mbti_classification_roberta_base_aug_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mbti_classification_roberta_base_aug_pipeline pipeline RoBertaForSequenceClassification from Shunian +author: John Snow Labs +name: mbti_classification_roberta_base_aug_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbti_classification_roberta_base_aug_pipeline` is a English model originally trained by Shunian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbti_classification_roberta_base_aug_pipeline_en_5.5.0_3.0_1725679858617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbti_classification_roberta_base_aug_pipeline_en_5.5.0_3.0_1725679858617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mbti_classification_roberta_base_aug_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mbti_classification_roberta_base_aug_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbti_classification_roberta_base_aug_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|467.0 MB| + +## References + +https://huggingface.co/Shunian/mbti-classification-roberta-base-aug + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_en.md b/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_en.md new file mode 100644 index 00000000000000..d51e88df925e5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English medical_english_spanish_8_16 MarianTransformer from DogGoesBark +author: John Snow Labs +name: medical_english_spanish_8_16 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_english_spanish_8_16` is a English model originally trained by DogGoesBark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_english_spanish_8_16_en_5.5.0_3.0_1725740853190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_english_spanish_8_16_en_5.5.0_3.0_1725740853190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("medical_english_spanish_8_16","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("medical_english_spanish_8_16","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_english_spanish_8_16| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|540.0 MB| + +## References + +https://huggingface.co/DogGoesBark/medical_en_es_8_16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_pipeline_en.md new file mode 100644 index 00000000000000..a790c47f116554 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-medical_english_spanish_8_16_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English medical_english_spanish_8_16_pipeline pipeline MarianTransformer from DogGoesBark +author: John Snow Labs +name: medical_english_spanish_8_16_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_english_spanish_8_16_pipeline` is a English model originally trained by DogGoesBark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_english_spanish_8_16_pipeline_en_5.5.0_3.0_1725740877654.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_english_spanish_8_16_pipeline_en_5.5.0_3.0_1725740877654.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("medical_english_spanish_8_16_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("medical_english_spanish_8_16_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_english_spanish_8_16_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|540.5 MB| + +## References + +https://huggingface.co/DogGoesBark/medical_en_es_8_16 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_da.md b/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_da.md new file mode 100644 index 00000000000000..776cb3c1bab818 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_da.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Danish memo_bert_sanskrit_saskta_01 XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: memo_bert_sanskrit_saskta_01 +date: 2024-09-07 +tags: [da, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: da +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_sanskrit_saskta_01` is a Danish model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_01_da_5.5.0_3.0_1725669191245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_01_da_5.5.0_3.0_1725669191245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_sanskrit_saskta_01","da") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("memo_bert_sanskrit_saskta_01", "da") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_sanskrit_saskta_01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|da| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/MeMo_BERT-SA_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_pipeline_da.md b/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_pipeline_da.md new file mode 100644 index 00000000000000..8ec99968cd32a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-memo_bert_sanskrit_saskta_01_pipeline_da.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Danish memo_bert_sanskrit_saskta_01_pipeline pipeline XlmRoBertaForSequenceClassification from yemen2016 +author: John Snow Labs +name: memo_bert_sanskrit_saskta_01_pipeline +date: 2024-09-07 +tags: [da, open_source, pipeline, onnx] +task: Text Classification +language: da +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`memo_bert_sanskrit_saskta_01_pipeline` is a Danish model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_01_pipeline_da_5.5.0_3.0_1725669212890.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/memo_bert_sanskrit_saskta_01_pipeline_da_5.5.0_3.0_1725669212890.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("memo_bert_sanskrit_saskta_01_pipeline", lang = "da") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("memo_bert_sanskrit_saskta_01_pipeline", lang = "da") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|memo_bert_sanskrit_saskta_01_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|da| +|Size:|466.6 MB| + +## References + +https://huggingface.co/yemen2016/MeMo_BERT-SA_01 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mformer_care_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mformer_care_pipeline_en.md new file mode 100644 index 00000000000000..6073db3aea01c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mformer_care_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mformer_care_pipeline pipeline RoBertaForSequenceClassification from joshnguyen +author: John Snow Labs +name: mformer_care_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mformer_care_pipeline` is a English model originally trained by joshnguyen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mformer_care_pipeline_en_5.5.0_3.0_1725679906407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mformer_care_pipeline_en_5.5.0_3.0_1725679906407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mformer_care_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mformer_care_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mformer_care_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.7 MB| + +## References + +https://huggingface.co/joshnguyen/mformer-care + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_en.md b/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_en.md new file mode 100644 index 00000000000000..3f8135b93cc28d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mika_safeaerobert BertEmbeddings from NASA-AIML +author: John Snow Labs +name: mika_safeaerobert +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mika_safeaerobert` is a English model originally trained by NASA-AIML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mika_safeaerobert_en_5.5.0_3.0_1725696725715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mika_safeaerobert_en_5.5.0_3.0_1725696725715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("mika_safeaerobert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("mika_safeaerobert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mika_safeaerobert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/NASA-AIML/MIKA_SafeAeroBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_pipeline_en.md new file mode 100644 index 00000000000000..e19e4076fa3e4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mika_safeaerobert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mika_safeaerobert_pipeline pipeline BertEmbeddings from NASA-AIML +author: John Snow Labs +name: mika_safeaerobert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mika_safeaerobert_pipeline` is a English model originally trained by NASA-AIML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mika_safeaerobert_pipeline_en_5.5.0_3.0_1725696744252.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mika_safeaerobert_pipeline_en_5.5.0_3.0_1725696744252.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mika_safeaerobert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mika_safeaerobert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mika_safeaerobert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/NASA-AIML/MIKA_SafeAeroBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-minilmv2_l6_h384_r_fineweb_100k_en.md b/docs/_posts/ahmedlone127/2024-09-07-minilmv2_l6_h384_r_fineweb_100k_en.md new file mode 100644 index 00000000000000..e076de5e70cd89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-minilmv2_l6_h384_r_fineweb_100k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English minilmv2_l6_h384_r_fineweb_100k RoBertaEmbeddings from pszemraj +author: John Snow Labs +name: minilmv2_l6_h384_r_fineweb_100k +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h384_r_fineweb_100k` is a English model originally trained by pszemraj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_r_fineweb_100k_en_5.5.0_3.0_1725716359789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_r_fineweb_100k_en_5.5.0_3.0_1725716359789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("minilmv2_l6_h384_r_fineweb_100k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("minilmv2_l6_h384_r_fineweb_100k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h384_r_fineweb_100k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|114.2 MB| + +## References + +https://huggingface.co/pszemraj/MiniLMv2-L6-H384_R-fineweb-100k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_en.md new file mode 100644 index 00000000000000..796abae63c7a27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mlcovid19_classifier XlmRoBertaForSequenceClassification from oscarwu +author: John Snow Labs +name: mlcovid19_classifier +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlcovid19_classifier` is a English model originally trained by oscarwu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlcovid19_classifier_en_5.5.0_3.0_1725670622254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlcovid19_classifier_en_5.5.0_3.0_1725670622254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("mlcovid19_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("mlcovid19_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlcovid19_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|866.1 MB| + +## References + +https://huggingface.co/oscarwu/mlcovid19-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_pipeline_en.md new file mode 100644 index 00000000000000..1c25d95d0a4803 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mlcovid19_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mlcovid19_classifier_pipeline pipeline XlmRoBertaForSequenceClassification from oscarwu +author: John Snow Labs +name: mlcovid19_classifier_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlcovid19_classifier_pipeline` is a English model originally trained by oscarwu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlcovid19_classifier_pipeline_en_5.5.0_3.0_1725670726790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlcovid19_classifier_pipeline_en_5.5.0_3.0_1725670726790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mlcovid19_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mlcovid19_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlcovid19_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|866.1 MB| + +## References + +https://huggingface.co/oscarwu/mlcovid19-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en.md new file mode 100644 index 00000000000000..23a9f0762391a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline pipeline XlmRoBertaForSequenceClassification from y2lan +author: John Snow Labs +name: mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline` is a English model originally trained by y2lan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en_5.5.0_3.0_1725671238535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline_en_5.5.0_3.0_1725671238535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mmarco_mminilmv2_l12_h384_v1_y2lan_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|399.6 MB| + +## References + +https://huggingface.co/y2lan/mmarco-mMiniLMv2-L12-H384-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model1_en.md b/docs/_posts/ahmedlone127/2024-09-07-model1_en.md new file mode 100644 index 00000000000000..5ba8128dd55a66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model1 BertEmbeddings from flymushroom +author: John Snow Labs +name: model1 +date: 2024-09-07 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model1` is a English model originally trained by flymushroom. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model1_en_5.5.0_3.0_1725726991745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model1_en_5.5.0_3.0_1725726991745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +References + +References + +https://huggingface.co/flymushroom/model1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model3e_norwegian_wd_norwegian_perturb_en.md b/docs/_posts/ahmedlone127/2024-09-07-model3e_norwegian_wd_norwegian_perturb_en.md new file mode 100644 index 00000000000000..d1d9c7cec4be75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model3e_norwegian_wd_norwegian_perturb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model3e_norwegian_wd_norwegian_perturb DistilBertForTokenClassification from cria111 +author: John Snow Labs +name: model3e_norwegian_wd_norwegian_perturb +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model3e_norwegian_wd_norwegian_perturb` is a English model originally trained by cria111. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model3e_norwegian_wd_norwegian_perturb_en_5.5.0_3.0_1725739667444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model3e_norwegian_wd_norwegian_perturb_en_5.5.0_3.0_1725739667444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("model3e_norwegian_wd_norwegian_perturb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("model3e_norwegian_wd_norwegian_perturb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model3e_norwegian_wd_norwegian_perturb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/cria111/model3e_no_wd_no_perturb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en.md new file mode 100644 index 00000000000000..d55774f6cfd301 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline pipeline MarianTransformer from MRP101py +author: John Snow Labs +name: model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline` is a English model originally trained by MRP101py. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en_5.5.0_3.0_1725740561192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline_en_5.5.0_3.0_1725740561192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_finetuned_kde4_english_tonga_tonga_islands_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/MRP101py/model-finetuned-kde4-en-to-fr + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_en.md b/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_en.md new file mode 100644 index 00000000000000..8db45cfb0144d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English model_m5_english_chinese_twi MarianTransformer from Konic +author: John Snow Labs +name: model_m5_english_chinese_twi +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_m5_english_chinese_twi` is a English model originally trained by Konic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_m5_english_chinese_twi_en_5.5.0_3.0_1725741510159.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_m5_english_chinese_twi_en_5.5.0_3.0_1725741510159.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("model_m5_english_chinese_twi","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("model_m5_english_chinese_twi","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_m5_english_chinese_twi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|542.3 MB| + +## References + +https://huggingface.co/Konic/model_m5_en_zh_tw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_pipeline_en.md new file mode 100644 index 00000000000000..7ca1fe4aebf126 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model_m5_english_chinese_twi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_m5_english_chinese_twi_pipeline pipeline MarianTransformer from Konic +author: John Snow Labs +name: model_m5_english_chinese_twi_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_m5_english_chinese_twi_pipeline` is a English model originally trained by Konic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_m5_english_chinese_twi_pipeline_en_5.5.0_3.0_1725741535027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_m5_english_chinese_twi_pipeline_en_5.5.0_3.0_1725741535027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_m5_english_chinese_twi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_m5_english_chinese_twi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_m5_english_chinese_twi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|542.8 MB| + +## References + +https://huggingface.co/Konic/model_m5_en_zh_tw + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-model_name_kayyyy27_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-model_name_kayyyy27_pipeline_en.md new file mode 100644 index 00000000000000..3927bab1ec8cb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-model_name_kayyyy27_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English model_name_kayyyy27_pipeline pipeline DistilBertForSequenceClassification from Kayyyy27 +author: John Snow Labs +name: model_name_kayyyy27_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_name_kayyyy27_pipeline` is a English model originally trained by Kayyyy27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_name_kayyyy27_pipeline_en_5.5.0_3.0_1725674450513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_name_kayyyy27_pipeline_en_5.5.0_3.0_1725674450513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("model_name_kayyyy27_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("model_name_kayyyy27_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_name_kayyyy27_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Kayyyy27/model_name + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-movie_review_roberta_imalexianne_en.md b/docs/_posts/ahmedlone127/2024-09-07-movie_review_roberta_imalexianne_en.md new file mode 100644 index 00000000000000..0ac94dda442e55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-movie_review_roberta_imalexianne_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English movie_review_roberta_imalexianne RoBertaForSequenceClassification from imalexianne +author: John Snow Labs +name: movie_review_roberta_imalexianne +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`movie_review_roberta_imalexianne` is a English model originally trained by imalexianne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/movie_review_roberta_imalexianne_en_5.5.0_3.0_1725717668849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/movie_review_roberta_imalexianne_en_5.5.0_3.0_1725717668849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("movie_review_roberta_imalexianne","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("movie_review_roberta_imalexianne", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|movie_review_roberta_imalexianne| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/imalexianne/Movie_Review_Roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_en.md b/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_en.md new file mode 100644 index 00000000000000..361eac8f6c320a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_natural_questions_mnsrl MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: mpnet_base_natural_questions_mnsrl +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_natural_questions_mnsrl` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_natural_questions_mnsrl_en_5.5.0_3.0_1725703567742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_natural_questions_mnsrl_en_5.5.0_3.0_1725703567742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_natural_questions_mnsrl","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_natural_questions_mnsrl","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_natural_questions_mnsrl| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/tomaarsen/mpnet-base-natural-questions-mnsrl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_pipeline_en.md new file mode 100644 index 00000000000000..2cfb47ce3032f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mpnet_base_natural_questions_mnsrl_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_base_natural_questions_mnsrl_pipeline pipeline MPNetEmbeddings from tomaarsen +author: John Snow Labs +name: mpnet_base_natural_questions_mnsrl_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_natural_questions_mnsrl_pipeline` is a English model originally trained by tomaarsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_natural_questions_mnsrl_pipeline_en_5.5.0_3.0_1725703588616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_natural_questions_mnsrl_pipeline_en_5.5.0_3.0_1725703588616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_base_natural_questions_mnsrl_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_base_natural_questions_mnsrl_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_natural_questions_mnsrl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/tomaarsen/mpnet-base-natural-questions-mnsrl + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-msbert_he.md b/docs/_posts/ahmedlone127/2024-09-07-msbert_he.md new file mode 100644 index 00000000000000..83095063561baf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-msbert_he.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hebrew msbert BertEmbeddings from dicta-il +author: John Snow Labs +name: msbert +date: 2024-09-07 +tags: [he, open_source, onnx, embeddings, bert] +task: Embeddings +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`msbert` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/msbert_he_5.5.0_3.0_1725696950647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/msbert_he_5.5.0_3.0_1725696950647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("msbert","he") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("msbert","he") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|msbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|he| +|Size:|440.6 MB| + +## References + +https://huggingface.co/dicta-il/MsBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-msbert_pipeline_he.md b/docs/_posts/ahmedlone127/2024-09-07-msbert_pipeline_he.md new file mode 100644 index 00000000000000..60b9b9a6ad9b5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-msbert_pipeline_he.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hebrew msbert_pipeline pipeline BertEmbeddings from dicta-il +author: John Snow Labs +name: msbert_pipeline +date: 2024-09-07 +tags: [he, open_source, pipeline, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`msbert_pipeline` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/msbert_pipeline_he_5.5.0_3.0_1725697073732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/msbert_pipeline_he_5.5.0_3.0_1725697073732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("msbert_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("msbert_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|msbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|440.6 MB| + +## References + +https://huggingface.co/dicta-il/MsBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-mtl_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-mtl_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..06be52d2706b1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-mtl_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English mtl_roberta_base_pipeline pipeline RoBertaEmbeddings from jgammack +author: John Snow Labs +name: mtl_roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mtl_roberta_base_pipeline` is a English model originally trained by jgammack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mtl_roberta_base_pipeline_en_5.5.0_3.0_1725715851774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mtl_roberta_base_pipeline_en_5.5.0_3.0_1725715851774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mtl_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mtl_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mtl_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.4 MB| + +## References + +https://huggingface.co/jgammack/MTL-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en.md new file mode 100644 index 00000000000000..bb488b5ad87467 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline pipeline MPNetEmbeddings from checkiejan +author: John Snow Labs +name: multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline` is a English model originally trained by checkiejan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en_5.5.0_3.0_1725703852080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline_en_5.5.0_3.0_1725703852080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_qa_mpnet_base_dot_v1_covidqa_search_65_25_1epoch_full_p_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/checkiejan/multi-qa-mpnet-base-dot-v1-covidqa-search-65-25-1epoch-full-p + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en.md b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en.md new file mode 100644 index 00000000000000..c0c29cc5fa7d12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch MPNetEmbeddings from checkiejan +author: John Snow Labs +name: multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch` is a English model originally trained by checkiejan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en_5.5.0_3.0_1725703589092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_en_5.5.0_3.0_1725703589092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/checkiejan/multi-qa-mpnet-base-dot-v1-covidqa-search-75-25-2epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en.md new file mode 100644 index 00000000000000..24f01e6f568417 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline pipeline MPNetEmbeddings from checkiejan +author: John Snow Labs +name: multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline` is a English model originally trained by checkiejan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en_5.5.0_3.0_1725703610557.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline_en_5.5.0_3.0_1725703610557.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_qa_mpnet_base_dot_v1_covidqa_search_75_25_2epoch_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/checkiejan/multi-qa-mpnet-base-dot-v1-covidqa-search-75-25-2epoch + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en.md b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en.md new file mode 100644 index 00000000000000..8ebdccfb5b5274 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English multi_qa_mpnet_base_dot_v1_fine_tuned_hs MPNetEmbeddings from dgroechel +author: John Snow Labs +name: multi_qa_mpnet_base_dot_v1_fine_tuned_hs +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_qa_mpnet_base_dot_v1_fine_tuned_hs` is a English model originally trained by dgroechel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en_5.5.0_3.0_1725703034655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_fine_tuned_hs_en_5.5.0_3.0_1725703034655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("multi_qa_mpnet_base_dot_v1_fine_tuned_hs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("multi_qa_mpnet_base_dot_v1_fine_tuned_hs","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_qa_mpnet_base_dot_v1_fine_tuned_hs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/dgroechel/multi-qa-mpnet-base-dot-v1-fine-tuned-hs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en.md new file mode 100644 index 00000000000000..3515de57f1ddd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline pipeline MPNetEmbeddings from dgroechel +author: John Snow Labs +name: multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline` is a English model originally trained by dgroechel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en_5.5.0_3.0_1725703054467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline_en_5.5.0_3.0_1725703054467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_qa_mpnet_base_dot_v1_fine_tuned_hs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/dgroechel/multi-qa-mpnet-base-dot-v1-fine-tuned-hs + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_pipeline_xx.md new file mode 100644 index 00000000000000..d6e2ed7f41e88d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual multilingual_e5_base_classification_v0_4_pipeline pipeline XlmRoBertaForSequenceClassification from Data-Lab +author: John Snow Labs +name: multilingual_e5_base_classification_v0_4_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_e5_base_classification_v0_4_pipeline` is a Multilingual model originally trained by Data-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_classification_v0_4_pipeline_xx_5.5.0_3.0_1725670552732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_classification_v0_4_pipeline_xx_5.5.0_3.0_1725670552732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multilingual_e5_base_classification_v0_4_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multilingual_e5_base_classification_v0_4_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_e5_base_classification_v0_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|797.0 MB| + +## References + +https://huggingface.co/Data-Lab/multilingual-e5-base_classification_v0.4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_xx.md b/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_xx.md new file mode 100644 index 00000000000000..c05dda84a3c8c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multilingual_e5_base_classification_v0_4_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_e5_base_classification_v0_4 XlmRoBertaForSequenceClassification from Data-Lab +author: John Snow Labs +name: multilingual_e5_base_classification_v0_4 +date: 2024-09-07 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_e5_base_classification_v0_4` is a Multilingual model originally trained by Data-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_classification_v0_4_xx_5.5.0_3.0_1725670413308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_classification_v0_4_xx_5.5.0_3.0_1725670413308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_e5_base_classification_v0_4","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("multilingual_e5_base_classification_v0_4", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_e5_base_classification_v0_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|796.9 MB| + +## References + +https://huggingface.co/Data-Lab/multilingual-e5-base_classification_v0.4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx.md new file mode 100644 index 00000000000000..9d1887a92ea6b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual multilingual_xlm_roberta_for_ner_eserdy_pipeline pipeline XlmRoBertaForTokenClassification from eserdy +author: John Snow Labs +name: multilingual_xlm_roberta_for_ner_eserdy_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_xlm_roberta_for_ner_eserdy_pipeline` is a Multilingual model originally trained by eserdy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx_5.5.0_3.0_1725694256321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_eserdy_pipeline_xx_5.5.0_3.0_1725694256321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("multilingual_xlm_roberta_for_ner_eserdy_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("multilingual_xlm_roberta_for_ner_eserdy_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_xlm_roberta_for_ner_eserdy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|840.8 MB| + +## References + +https://huggingface.co/eserdy/multilingual-xlm-roberta-for-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_xx.md b/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_xx.md new file mode 100644 index 00000000000000..c4980780b800e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-multilingual_xlm_roberta_for_ner_eserdy_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_xlm_roberta_for_ner_eserdy XlmRoBertaForTokenClassification from eserdy +author: John Snow Labs +name: multilingual_xlm_roberta_for_ner_eserdy +date: 2024-09-07 +tags: [xx, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_xlm_roberta_for_ner_eserdy` is a Multilingual model originally trained by eserdy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_eserdy_xx_5.5.0_3.0_1725694171138.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_eserdy_xx_5.5.0_3.0_1725694171138.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_eserdy","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_eserdy", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_xlm_roberta_for_ner_eserdy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|840.8 MB| + +## References + +https://huggingface.co/eserdy/multilingual-xlm-roberta-for-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-n_roberta_imdb_padding60model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-n_roberta_imdb_padding60model_pipeline_en.md new file mode 100644 index 00000000000000..74362ddde2b563 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-n_roberta_imdb_padding60model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English n_roberta_imdb_padding60model_pipeline pipeline RoBertaForSequenceClassification from Realgon +author: John Snow Labs +name: n_roberta_imdb_padding60model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`n_roberta_imdb_padding60model_pipeline` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/n_roberta_imdb_padding60model_pipeline_en_5.5.0_3.0_1725680476123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/n_roberta_imdb_padding60model_pipeline_en_5.5.0_3.0_1725680476123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("n_roberta_imdb_padding60model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("n_roberta_imdb_padding60model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|n_roberta_imdb_padding60model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.2 MB| + +## References + +https://huggingface.co/Realgon/N_roberta_imdb_padding60model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-named_entity_recognition_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-named_entity_recognition_pipeline_en.md new file mode 100644 index 00000000000000..75bfbb0a08c0a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-named_entity_recognition_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English named_entity_recognition_pipeline pipeline DistilBertForTokenClassification from Sarthak279 +author: John Snow Labs +name: named_entity_recognition_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`named_entity_recognition_pipeline` is a English model originally trained by Sarthak279. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/named_entity_recognition_pipeline_en_5.5.0_3.0_1725734185191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/named_entity_recognition_pipeline_en_5.5.0_3.0_1725734185191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("named_entity_recognition_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("named_entity_recognition_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|named_entity_recognition_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/Sarthak279/Named-Entity-Recognition + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_danish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_danish_pipeline_en.md new file mode 100644 index 00000000000000..eae3c5ac3206fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_danish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nepal_bhasa_trained_danish_pipeline pipeline DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: nepal_bhasa_trained_danish_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_trained_danish_pipeline` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_danish_pipeline_en_5.5.0_3.0_1725729782514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_danish_pipeline_en_5.5.0_3.0_1725729782514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nepal_bhasa_trained_danish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nepal_bhasa_trained_danish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_trained_danish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/NEW_trained_danish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_en.md b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_en.md new file mode 100644 index 00000000000000..ff9def5ae4006e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nepal_bhasa_trained_serbian DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: nepal_bhasa_trained_serbian +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_trained_serbian` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_serbian_en_5.5.0_3.0_1725739357939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_serbian_en_5.5.0_3.0_1725739357939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("nepal_bhasa_trained_serbian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("nepal_bhasa_trained_serbian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_trained_serbian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/NEW_trained_serbian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_pipeline_en.md new file mode 100644 index 00000000000000..25e38f9c370bb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nepal_bhasa_trained_serbian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nepal_bhasa_trained_serbian_pipeline pipeline DistilBertForTokenClassification from annamariagnat +author: John Snow Labs +name: nepal_bhasa_trained_serbian_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_trained_serbian_pipeline` is a English model originally trained by annamariagnat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_serbian_pipeline_en_5.5.0_3.0_1725739381023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_trained_serbian_pipeline_en_5.5.0_3.0_1725739381023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nepal_bhasa_trained_serbian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nepal_bhasa_trained_serbian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_trained_serbian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/annamariagnat/NEW_trained_serbian + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_411_id.md b/docs/_posts/ahmedlone127/2024-09-07-ner_411_id.md new file mode 100644 index 00000000000000..e29eee1efcc82e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_411_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian ner_411 XlmRoBertaForTokenClassification from blekkk +author: John Snow Labs +name: ner_411 +date: 2024-09-07 +tags: [id, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_411` is a Indonesian model originally trained by blekkk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_411_id_5.5.0_3.0_1725693326564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_411_id_5.5.0_3.0_1725693326564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_411","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("ner_411", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_411| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|id| +|Size:|772.8 MB| + +## References + +https://huggingface.co/blekkk/ner_411 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_411_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-07-ner_411_pipeline_id.md new file mode 100644 index 00000000000000..91467ca7d7c7df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_411_pipeline_id.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Indonesian ner_411_pipeline pipeline XlmRoBertaForTokenClassification from blekkk +author: John Snow Labs +name: ner_411_pipeline +date: 2024-09-07 +tags: [id, open_source, pipeline, onnx] +task: Named Entity Recognition +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_411_pipeline` is a Indonesian model originally trained by blekkk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_411_pipeline_id_5.5.0_3.0_1725693463866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_411_pipeline_id_5.5.0_3.0_1725693463866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_411_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_411_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_411_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|772.8 MB| + +## References + +https://huggingface.co/blekkk/ner_411 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_en.md new file mode 100644 index 00000000000000..f0a700bd11a8c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_classifier_distil_bert DistilBertForTokenClassification from florenciopaucar +author: John Snow Labs +name: ner_classifier_distil_bert +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_classifier_distil_bert` is a English model originally trained by florenciopaucar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_classifier_distil_bert_en_5.5.0_3.0_1725734031104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_classifier_distil_bert_en_5.5.0_3.0_1725734031104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_classifier_distil_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_classifier_distil_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_classifier_distil_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/florenciopaucar/ner-classifier-distil-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_pipeline_en.md new file mode 100644 index 00000000000000..e9580c39a7cd11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_classifier_distil_bert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_classifier_distil_bert_pipeline pipeline DistilBertForTokenClassification from florenciopaucar +author: John Snow Labs +name: ner_classifier_distil_bert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_classifier_distil_bert_pipeline` is a English model originally trained by florenciopaucar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_classifier_distil_bert_pipeline_en_5.5.0_3.0_1725734042858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_classifier_distil_bert_pipeline_en_5.5.0_3.0_1725734042858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_classifier_distil_bert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_classifier_distil_bert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_classifier_distil_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/florenciopaucar/ner-classifier-distil-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_cw_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_cw_pipeline_en.md new file mode 100644 index 00000000000000..bafaaabc0c62d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_cw_pipeline_en.md @@ -0,0 +1,66 @@ +--- +layout: model +title: English ner_cw_pipeline pipeline DistilBertForTokenClassification from ArshiaKarimian +author: John Snow Labs +name: ner_cw_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_cw_pipeline` is a English model originally trained by ArshiaKarimian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_cw_pipeline_en_5.5.0_3.0_1725729785619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_cw_pipeline_en_5.5.0_3.0_1725729785619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_cw_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_cw_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_cw_pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ArshiaKarimian/NER_CW_pipeline \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_finetuned_en.md new file mode 100644 index 00000000000000..755ed5bcba967b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_finetuned DistilBertForTokenClassification from cxx5208 +author: John Snow Labs +name: ner_finetuned +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_finetuned` is a English model originally trained by cxx5208. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_finetuned_en_5.5.0_3.0_1725739213832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_finetuned_en_5.5.0_3.0_1725739213832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/cxx5208/NER_finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_legal_german_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-07-ner_legal_german_pipeline_de.md new file mode 100644 index 00000000000000..42d2ba521d1e53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_legal_german_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German ner_legal_german_pipeline pipeline BertForTokenClassification from Sahajtomar +author: John Snow Labs +name: ner_legal_german_pipeline +date: 2024-09-07 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_legal_german_pipeline` is a German model originally trained by Sahajtomar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_legal_german_pipeline_de_5.5.0_3.0_1725726279252.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_legal_german_pipeline_de_5.5.0_3.0_1725726279252.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_legal_german_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_legal_german_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_legal_german_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Sahajtomar/NER_legal_de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_en.md new file mode 100644 index 00000000000000..6064e964b5db68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_model_abderrahimal DistilBertForTokenClassification from AbderrahimAl +author: John Snow Labs +name: ner_model_abderrahimal +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_abderrahimal` is a English model originally trained by AbderrahimAl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_abderrahimal_en_5.5.0_3.0_1725734081117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_abderrahimal_en_5.5.0_3.0_1725734081117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_abderrahimal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("ner_model_abderrahimal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_abderrahimal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AbderrahimAl/ner_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_pipeline_en.md new file mode 100644 index 00000000000000..65409f9c3d191c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_model_abderrahimal_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ner_model_abderrahimal_pipeline pipeline DistilBertForTokenClassification from AbderrahimAl +author: John Snow Labs +name: ner_model_abderrahimal_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_model_abderrahimal_pipeline` is a English model originally trained by AbderrahimAl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_model_abderrahimal_pipeline_en_5.5.0_3.0_1725734093179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_model_abderrahimal_pipeline_en_5.5.0_3.0_1725734093179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_model_abderrahimal_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_model_abderrahimal_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_model_abderrahimal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AbderrahimAl/ner_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_ner_random3_seed0_roberta_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-ner_ner_random3_seed0_roberta_large_en.md new file mode 100644 index 00000000000000..4919582c60ba6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_ner_random3_seed0_roberta_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ner_ner_random3_seed0_roberta_large RoBertaForTokenClassification from tweettemposhift +author: John Snow Labs +name: ner_ner_random3_seed0_roberta_large +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_ner_random3_seed0_roberta_large` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_ner_random3_seed0_roberta_large_en_5.5.0_3.0_1725707898310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_ner_random3_seed0_roberta_large_en_5.5.0_3.0_1725707898310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("ner_ner_random3_seed0_roberta_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("ner_ner_random3_seed0_roberta_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_ner_random3_seed0_roberta_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/tweettemposhift/ner-ner_random3_seed0-roberta-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ner_newsagency_bert_french_pipeline_fr.md b/docs/_posts/ahmedlone127/2024-09-07-ner_newsagency_bert_french_pipeline_fr.md new file mode 100644 index 00000000000000..8376826fc5fd65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ner_newsagency_bert_french_pipeline_fr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: French ner_newsagency_bert_french_pipeline pipeline BertForTokenClassification from impresso-project +author: John Snow Labs +name: ner_newsagency_bert_french_pipeline +date: 2024-09-07 +tags: [fr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ner_newsagency_bert_french_pipeline` is a French model originally trained by impresso-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ner_newsagency_bert_french_pipeline_fr_5.5.0_3.0_1725734733759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ner_newsagency_bert_french_pipeline_fr_5.5.0_3.0_1725734733759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ner_newsagency_bert_french_pipeline", lang = "fr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ner_newsagency_bert_french_pipeline", lang = "fr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ner_newsagency_bert_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fr| +|Size:|412.5 MB| + +## References + +https://huggingface.co/impresso-project/ner-newsagency-bert-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_en.md new file mode 100644 index 00000000000000..404f0fd12dda6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nerd_nerd_random0_seed1_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random0_seed1_bernice +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random0_seed1_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random0_seed1_bernice_en_5.5.0_3.0_1725669868787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random0_seed1_bernice_en_5.5.0_3.0_1725669868787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random0_seed1_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random0_seed1_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random0_seed1_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|831.8 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random0_seed1-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_pipeline_en.md new file mode 100644 index 00000000000000..82663345b932de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random0_seed1_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nerd_nerd_random0_seed1_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random0_seed1_bernice_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random0_seed1_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random0_seed1_bernice_pipeline_en_5.5.0_3.0_1725669991511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random0_seed1_bernice_pipeline_en_5.5.0_3.0_1725669991511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerd_nerd_random0_seed1_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerd_nerd_random0_seed1_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random0_seed1_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|831.8 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random0_seed1-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_en.md b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_en.md new file mode 100644 index 00000000000000..697d0794a93fb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nerd_nerd_random3_seed0_bernice XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random3_seed0_bernice +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random3_seed0_bernice` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random3_seed0_bernice_en_5.5.0_3.0_1725669686224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random3_seed0_bernice_en_5.5.0_3.0_1725669686224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random3_seed0_bernice","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("nerd_nerd_random3_seed0_bernice", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random3_seed0_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|831.7 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random3_seed0-bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_pipeline_en.md new file mode 100644 index 00000000000000..5c775e20780a13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nerd_nerd_random3_seed0_bernice_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English nerd_nerd_random3_seed0_bernice_pipeline pipeline XlmRoBertaForSequenceClassification from tweettemposhift +author: John Snow Labs +name: nerd_nerd_random3_seed0_bernice_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nerd_nerd_random3_seed0_bernice_pipeline` is a English model originally trained by tweettemposhift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nerd_nerd_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725669808345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nerd_nerd_random3_seed0_bernice_pipeline_en_5.5.0_3.0_1725669808345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("nerd_nerd_random3_seed0_bernice_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("nerd_nerd_random3_seed0_bernice_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nerd_nerd_random3_seed0_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|831.7 MB| + +## References + +https://huggingface.co/tweettemposhift/nerd-nerd_random3_seed0-bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_en.md b/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_en.md new file mode 100644 index 00000000000000..a36e21c7238a09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English netflix_reviews DistilBertForSequenceClassification from luckyiso +author: John Snow Labs +name: netflix_reviews +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`netflix_reviews` is a English model originally trained by luckyiso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/netflix_reviews_en_5.5.0_3.0_1725674473027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/netflix_reviews_en_5.5.0_3.0_1725674473027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("netflix_reviews","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("netflix_reviews", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|netflix_reviews| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/luckyiso/NETFLIX_reviews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_pipeline_en.md new file mode 100644 index 00000000000000..0165daba9f7d63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-netflix_reviews_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English netflix_reviews_pipeline pipeline DistilBertForSequenceClassification from luckyiso +author: John Snow Labs +name: netflix_reviews_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`netflix_reviews_pipeline` is a English model originally trained by luckyiso. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/netflix_reviews_pipeline_en_5.5.0_3.0_1725674485608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/netflix_reviews_pipeline_en_5.5.0_3.0_1725674485608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("netflix_reviews_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("netflix_reviews_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|netflix_reviews_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/luckyiso/NETFLIX_reviews + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nofibot3_en.md b/docs/_posts/ahmedlone127/2024-09-07-nofibot3_en.md new file mode 100644 index 00000000000000..5c4771b19ba627 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nofibot3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English nofibot3 DistilBertForQuestionAnswering from aslakeinbu +author: John Snow Labs +name: nofibot3 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nofibot3` is a English model originally trained by aslakeinbu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nofibot3_en_5.5.0_3.0_1725746232162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nofibot3_en_5.5.0_3.0_1725746232162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("nofibot3","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("nofibot3", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nofibot3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/aslakeinbu/nofibot3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_bert_base_no.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_bert_base_no.md new file mode 100644 index 00000000000000..c02e1dd7297c4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_bert_base_no.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Norwegian norwegian_bokml_bert_base BertEmbeddings from NbAiLab +author: John Snow Labs +name: norwegian_bokml_bert_base +date: 2024-09-07 +tags: ["no", open_source, onnx, embeddings, bert] +task: Embeddings +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_bert_base` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_no_5.5.0_3.0_1725697106810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_bert_base_no_5.5.0_3.0_1725697106810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = BertEmbeddings.pretrained("norwegian_bokml_bert_base","no") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = BertEmbeddings.pretrained("norwegian_bokml_bert_base","no") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_bert_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|no| +|Size:|666.2 MB| + +## References + +https://huggingface.co/NbAiLab/nb-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_en.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_en.md new file mode 100644 index 00000000000000..a27ab03a0c7aac --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English norwegian_bokml_roberta_base_scandinavian_long RoBertaEmbeddings from pere +author: John Snow Labs +name: norwegian_bokml_roberta_base_scandinavian_long +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_roberta_base_scandinavian_long` is a English model originally trained by pere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandinavian_long_en_5.5.0_3.0_1725672866658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandinavian_long_en_5.5.0_3.0_1725672866658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("norwegian_bokml_roberta_base_scandinavian_long","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("norwegian_bokml_roberta_base_scandinavian_long","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_roberta_base_scandinavian_long| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/pere/nb-roberta-base-scandinavian-long \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_pipeline_en.md new file mode 100644 index 00000000000000..3442abd2ab734f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_roberta_base_scandinavian_long_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English norwegian_bokml_roberta_base_scandinavian_long_pipeline pipeline RoBertaEmbeddings from pere +author: John Snow Labs +name: norwegian_bokml_roberta_base_scandinavian_long_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_roberta_base_scandinavian_long_pipeline` is a English model originally trained by pere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandinavian_long_pipeline_en_5.5.0_3.0_1725672888414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_roberta_base_scandinavian_long_pipeline_en_5.5.0_3.0_1725672888414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_roberta_base_scandinavian_long_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_roberta_base_scandinavian_long_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_roberta_base_scandinavian_long_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/pere/nb-roberta-base-scandinavian-long + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_no.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_no.md new file mode 100644 index 00000000000000..9be5d9e1bb2144 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_base_nbailabbeta WhisperForCTC from NbAiLabBeta +author: John Snow Labs +name: norwegian_bokml_whisper_base_nbailabbeta +date: 2024-09-07 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_base_nbailabbeta` is a Norwegian model originally trained by NbAiLabBeta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_nbailabbeta_no_5.5.0_3.0_1725751805553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_nbailabbeta_no_5.5.0_3.0_1725751805553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_base_nbailabbeta","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_base_nbailabbeta", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_base_nbailabbeta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|633.6 MB| + +## References + +https://huggingface.co/NbAiLabBeta/nb-whisper-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_pipeline_no.md new file mode 100644 index 00000000000000..9dc27794db59ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_base_nbailabbeta_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_base_nbailabbeta_pipeline pipeline WhisperForCTC from NbAiLabBeta +author: John Snow Labs +name: norwegian_bokml_whisper_base_nbailabbeta_pipeline +date: 2024-09-07 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_base_nbailabbeta_pipeline` is a Norwegian model originally trained by NbAiLabBeta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_nbailabbeta_pipeline_no_5.5.0_3.0_1725751838099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_base_nbailabbeta_pipeline_no_5.5.0_3.0_1725751838099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_base_nbailabbeta_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_base_nbailabbeta_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_base_nbailabbeta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|633.6 MB| + +## References + +https://huggingface.co/NbAiLabBeta/nb-whisper-base + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_no.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_no.md new file mode 100644 index 00000000000000..ed539a9a22c078 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_no.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_tiny_beta WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_tiny_beta +date: 2024-09-07 +tags: ["no", open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_tiny_beta` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_tiny_beta_no_5.5.0_3.0_1725752757770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_tiny_beta_no_5.5.0_3.0_1725752757770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_tiny_beta","no") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("norwegian_bokml_whisper_tiny_beta", "no") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_tiny_beta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|no| +|Size:|391.9 MB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-tiny-beta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_pipeline_no.md b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_pipeline_no.md new file mode 100644 index 00000000000000..43eaa8bdeae771 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-norwegian_bokml_whisper_tiny_beta_pipeline_no.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Norwegian norwegian_bokml_whisper_tiny_beta_pipeline pipeline WhisperForCTC from NbAiLab +author: John Snow Labs +name: norwegian_bokml_whisper_tiny_beta_pipeline +date: 2024-09-07 +tags: ["no", open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: "no" +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norwegian_bokml_whisper_tiny_beta_pipeline` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_tiny_beta_pipeline_no_5.5.0_3.0_1725752777416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norwegian_bokml_whisper_tiny_beta_pipeline_no_5.5.0_3.0_1725752777416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("norwegian_bokml_whisper_tiny_beta_pipeline", lang = "no") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("norwegian_bokml_whisper_tiny_beta_pipeline", lang = "no") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norwegian_bokml_whisper_tiny_beta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|no| +|Size:|391.9 MB| + +## References + +https://huggingface.co/NbAiLab/nb-whisper-tiny-beta + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en.md new file mode 100644 index 00000000000000..cac96a80e4a379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nreimers_minilmv2_l6_h384_distilled_from_roberta_large RoBertaForTokenClassification from baileyk +author: John Snow Labs +name: nreimers_minilmv2_l6_h384_distilled_from_roberta_large +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nreimers_minilmv2_l6_h384_distilled_from_roberta_large` is a English model originally trained by baileyk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en_5.5.0_3.0_1725707510082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nreimers_minilmv2_l6_h384_distilled_from_roberta_large_en_5.5.0_3.0_1725707510082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("nreimers_minilmv2_l6_h384_distilled_from_roberta_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("nreimers_minilmv2_l6_h384_distilled_from_roberta_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nreimers_minilmv2_l6_h384_distilled_from_roberta_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|111.4 MB| + +## References + +https://huggingface.co/baileyk/nreimers_MiniLMv2-L6-H384-distilled-from-RoBERTa-Large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nuclear_medicine_daroberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-nuclear_medicine_daroberta_en.md new file mode 100644 index 00000000000000..333659315ad703 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nuclear_medicine_daroberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English nuclear_medicine_daroberta RoBertaEmbeddings from Zach88 +author: John Snow Labs +name: nuclear_medicine_daroberta +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuclear_medicine_daroberta` is a English model originally trained by Zach88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuclear_medicine_daroberta_en_5.5.0_3.0_1725678201044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuclear_medicine_daroberta_en_5.5.0_3.0_1725678201044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("nuclear_medicine_daroberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("nuclear_medicine_daroberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuclear_medicine_daroberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Zach88/nuclear_medicine_DARoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-nyaszzzz_en.md b/docs/_posts/ahmedlone127/2024-09-07-nyaszzzz_en.md new file mode 100644 index 00000000000000..b4660686145c91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-nyaszzzz_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English nyaszzzz DistilBertForQuestionAnswering from kevinbror +author: John Snow Labs +name: nyaszzzz +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nyaszzzz` is a English model originally trained by kevinbror. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nyaszzzz_en_5.5.0_3.0_1725745848822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nyaszzzz_en_5.5.0_3.0_1725745848822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("nyaszzzz","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("nyaszzzz", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nyaszzzz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kevinbror/nyaszzzz \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_en.md b/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_en.md new file mode 100644 index 00000000000000..4c477f3c036e78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ofa_multi_100 XlmRoBertaEmbeddings from yihongLiu +author: John Snow Labs +name: ofa_multi_100 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ofa_multi_100` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ofa_multi_100_en_5.5.0_3.0_1725676878054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ofa_multi_100_en_5.5.0_3.0_1725676878054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("ofa_multi_100","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("ofa_multi_100","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ofa_multi_100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_pipeline_en.md new file mode 100644 index 00000000000000..1b1fe902ca9ce9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ofa_multi_100_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ofa_multi_100_pipeline pipeline XlmRoBertaEmbeddings from yihongLiu +author: John Snow Labs +name: ofa_multi_100_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ofa_multi_100_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ofa_multi_100_pipeline_en_5.5.0_3.0_1725676953204.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ofa_multi_100_pipeline_en_5.5.0_3.0_1725676953204.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ofa_multi_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ofa_multi_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ofa_multi_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opticalbert_cased_squad2_en.md b/docs/_posts/ahmedlone127/2024-09-07-opticalbert_cased_squad2_en.md new file mode 100644 index 00000000000000..1cd432d18a2223 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opticalbert_cased_squad2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English opticalbert_cased_squad2 BertForQuestionAnswering from CambridgeMolecularEngineering +author: John Snow Labs +name: opticalbert_cased_squad2 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalbert_cased_squad2` is a English model originally trained by CambridgeMolecularEngineering. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalbert_cased_squad2_en_5.5.0_3.0_1725709221161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalbert_cased_squad2_en_5.5.0_3.0_1725709221161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("opticalbert_cased_squad2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("opticalbert_cased_squad2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalbert_cased_squad2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/CambridgeMolecularEngineering/opticalbert_cased-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_en.md new file mode 100644 index 00000000000000..7d3769bc32a3c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_base_ailem_adaptified MarianTransformer from ethansimrm +author: John Snow Labs +name: opus_base_ailem_adaptified +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_base_ailem_adaptified` is a English model originally trained by ethansimrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_base_ailem_adaptified_en_5.5.0_3.0_1725747259206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_base_ailem_adaptified_en_5.5.0_3.0_1725747259206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_base_ailem_adaptified","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_base_ailem_adaptified","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_base_ailem_adaptified| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.4 MB| + +## References + +https://huggingface.co/ethansimrm/opus_base_ailem_adaptified \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_pipeline_en.md new file mode 100644 index 00000000000000..3f104fecc1799c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_base_ailem_adaptified_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_base_ailem_adaptified_pipeline pipeline MarianTransformer from ethansimrm +author: John Snow Labs +name: opus_base_ailem_adaptified_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_base_ailem_adaptified_pipeline` is a English model originally trained by ethansimrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_base_ailem_adaptified_pipeline_en_5.5.0_3.0_1725747283880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_base_ailem_adaptified_pipeline_en_5.5.0_3.0_1725747283880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_base_ailem_adaptified_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_base_ailem_adaptified_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_base_ailem_adaptified_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.0 MB| + +## References + +https://huggingface.co/ethansimrm/opus_base_ailem_adaptified + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_big_ailem_random_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_big_ailem_random_en.md new file mode 100644 index 00000000000000..f65010ba3830f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_big_ailem_random_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_big_ailem_random MarianTransformer from ethansimrm +author: John Snow Labs +name: opus_big_ailem_random +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_big_ailem_random` is a English model originally trained by ethansimrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_big_ailem_random_en_5.5.0_3.0_1725746707031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_big_ailem_random_en_5.5.0_3.0_1725746707031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_big_ailem_random","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_big_ailem_random","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_big_ailem_random| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ethansimrm/opus_big_ailem_random \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en.md new file mode 100644 index 00000000000000..b832079e6b07d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing MarianTransformer from s3h +author: John Snow Labs +name: opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing` is a English model originally trained by s3h. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en_5.5.0_3.0_1725740523347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing_en_5.5.0_3.0_1725740523347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_arabic_english_finetuned_src_tonga_tonga_islands_trg_testing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.9 MB| + +## References + +https://huggingface.co/s3h/opus-mt-ar-en-finetuned-src-to-trg-testing \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en.md new file mode 100644 index 00000000000000..3932c317ae6c83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian MarianTransformer from VFiona +author: John Snow Labs +name: opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian` is a English model originally trained by VFiona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en_5.5.0_3.0_1725740333715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian_en_5.5.0_3.0_1725740333715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_italian_finetuned_20000_english_tonga_tonga_islands_italian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|622.9 MB| + +## References + +https://huggingface.co/VFiona/opus-mt-en-it-finetuned_20000-en-to-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en.md new file mode 100644 index 00000000000000..66ca6d72ea7998 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta MarianTransformer from AnhTuanTA +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta` is a English model originally trained by AnhTuanTA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en_5.5.0_3.0_1725741502777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_en_5.5.0_3.0_1725741502777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/AnhTuanTA/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en.md new file mode 100644 index 00000000000000..4e857e40df50f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline pipeline MarianTransformer from AnhTuanTA +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline` is a English model originally trained by AnhTuanTA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en_5.5.0_3.0_1725741525692.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline_en_5.5.0_3.0_1725741525692.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_anhtuanta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.1 MB| + +## References + +https://huggingface.co/AnhTuanTA/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en.md new file mode 100644 index 00000000000000..0ad5d3b6c43836 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline pipeline MarianTransformer from louistichelman +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline` is a English model originally trained by louistichelman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en_5.5.0_3.0_1725746541012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline_en_5.5.0_3.0_1725746541012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_louistichelman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.1 MB| + +## References + +https://huggingface.co/louistichelman/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en.md new file mode 100644 index 00000000000000..fadc34f932cfb4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22 MarianTransformer from momo22 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22` is a English model originally trained by momo22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en_5.5.0_3.0_1725741273400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_en_5.5.0_3.0_1725741273400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/momo22/opus-mt-en-ro-finetuned-en-to-ro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en.md new file mode 100644 index 00000000000000..580e097f504048 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline pipeline MarianTransformer from momo22 +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline` is a English model originally trained by momo22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en_5.5.0_3.0_1725741297075.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline_en_5.5.0_3.0_1725741297075.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_momo22_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.1 MB| + +## References + +https://huggingface.co/momo22/opus-mt-en-ro-finetuned-en-to-ro + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en.md new file mode 100644 index 00000000000000..dd00f32c34d324 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt MarianTransformer from himanshubeniwal +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt` is a English model originally trained by himanshubeniwal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en_5.5.0_3.0_1725740569331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt_en_5.5.0_3.0_1725740569331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_clean_marianmt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.0 MB| + +## References + +https://huggingface.co/himanshubeniwal/opus-mt-en-ro-finetuned-ro-to-en-clean-MarianMT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en.md new file mode 100644 index 00000000000000..425d133b529943 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline pipeline MarianTransformer from Zumaridi +author: John Snow Labs +name: opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline` is a English model originally trained by Zumaridi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en_5.5.0_3.0_1725740426153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline_en_5.5.0_3.0_1725740426153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_swahili_finetuned_english_tonga_tonga_islands_swahili_zumaridi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.0 MB| + +## References + +https://huggingface.co/Zumaridi/opus-mt-en-sw-finetuned-en-to-sw + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_french_english_bds_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_french_english_bds_pipeline_en.md new file mode 100644 index 00000000000000..bf0c864089cc8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_french_english_bds_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_french_english_bds_pipeline pipeline MarianTransformer from Anhptp +author: John Snow Labs +name: opus_maltese_french_english_bds_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_french_english_bds_pipeline` is a English model originally trained by Anhptp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_french_english_bds_pipeline_en_5.5.0_3.0_1725741182698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_french_english_bds_pipeline_en_5.5.0_3.0_1725741182698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_french_english_bds_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_french_english_bds_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_french_english_bds_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/Anhptp/opus-mt-fr-en-BDS + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..3842ff7b9985b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from Eyesiga +author: John Snow Labs +name: opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline` is a English model originally trained by Eyesiga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725740651565.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725740651565.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_ganda_english_finetuned_lm_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|512.5 MB| + +## References + +https://huggingface.co/Eyesiga/opus-mt-lg-en-finetuned-lm-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en.md new file mode 100644 index 00000000000000..aac7f917b228fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim MarianTransformer from DongHyunKim +author: John Snow Labs +name: opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim` is a English model originally trained by DongHyunKim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en_5.5.0_3.0_1725741680197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim_en_5.5.0_3.0_1725741680197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_donghyunkim| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.6 MB| + +## References + +https://huggingface.co/DongHyunKim/opus-mt-de-en-finetuned-de-to-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en.md new file mode 100644 index 00000000000000..155f120e749fee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios MarianTransformer from felipetanios +author: John Snow Labs +name: opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios` is a English model originally trained by felipetanios. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en_5.5.0_3.0_1725748099878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios_en_5.5.0_3.0_1725748099878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_felipetanios| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.4 MB| + +## References + +https://huggingface.co/felipetanios/opus-mt-de-en-finetuned-de-to-en-second \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en.md new file mode 100644 index 00000000000000..cadc702e5ea73c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta MarianTransformer from tiagohatta +author: John Snow Labs +name: opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta` is a English model originally trained by tiagohatta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en_5.5.0_3.0_1725747457214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta_en_5.5.0_3.0_1725747457214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_german_english_finetuned_german_tonga_tonga_islands_english_second_tiagohatta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.4 MB| + +## References + +https://huggingface.co/tiagohatta/opus-mt-de-en-finetuned-de-to-en-second \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..6f8bf95b438c29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from VFiona +author: John Snow Labs +name: opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline` is a English model originally trained by VFiona. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725746555899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725746555899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_italian_english_finetuned_5000_italian_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|625.5 MB| + +## References + +https://huggingface.co/VFiona/opus-mt-it-en-finetuned_5000-it-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en.md new file mode 100644 index 00000000000000..2e6ece74fc4429 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline pipeline MarianTransformer from UnassumingOwl +author: John Snow Labs +name: opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline` is a English model originally trained by UnassumingOwl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en_5.5.0_3.0_1725746920214.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline_en_5.5.0_3.0_1725746920214.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_romance_english_finetuned_npomo_english_15_epochs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|539.5 MB| + +## References + +https://huggingface.co/UnassumingOwl/opus-mt-ROMANCE-en-finetuned-npomo-en-15-epochs + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan.md b/docs/_posts/ahmedlone127/2024-09-07-opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan.md new file mode 100644 index 00000000000000..cc5045986eb3c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan.md @@ -0,0 +1,70 @@ +--- +layout: model +title: None opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline pipeline MarianTransformer from julianty +author: John Snow Labs +name: opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline +date: 2024-09-07 +tags: [nan, open_source, pipeline, onnx] +task: Translation +language: nan +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline` is a None model originally trained by julianty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan_5.5.0_3.0_1725747329366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline_nan_5.5.0_3.0_1725747329366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline", lang = "nan") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline", lang = "nan") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_tatoeba_english_japanese_finetuned_eng_tonga_tonga_islands_jpn_hani_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|nan| +|Size:|542.6 MB| + +## References + +https://huggingface.co/julianty/opus-tatoeba-en-ja-finetuned-eng-to-jpn_Hani + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_en.md new file mode 100644 index 00000000000000..f4fa5afdb42bd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_wmt_finetuned_enfr_wang_2022 MarianTransformer from ethansimrm +author: John Snow Labs +name: opus_wmt_finetuned_enfr_wang_2022 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_wmt_finetuned_enfr_wang_2022` is a English model originally trained by ethansimrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_wmt_finetuned_enfr_wang_2022_en_5.5.0_3.0_1725741188674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_wmt_finetuned_enfr_wang_2022_en_5.5.0_3.0_1725741188674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_wmt_finetuned_enfr_wang_2022","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_wmt_finetuned_enfr_wang_2022","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_wmt_finetuned_enfr_wang_2022| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.4 MB| + +## References + +https://huggingface.co/ethansimrm/opus_wmt_finetuned_enfr_wang_2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_pipeline_en.md new file mode 100644 index 00000000000000..cb0fcced5e1666 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-opus_wmt_finetuned_enfr_wang_2022_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_wmt_finetuned_enfr_wang_2022_pipeline pipeline MarianTransformer from ethansimrm +author: John Snow Labs +name: opus_wmt_finetuned_enfr_wang_2022_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_wmt_finetuned_enfr_wang_2022_pipeline` is a English model originally trained by ethansimrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_wmt_finetuned_enfr_wang_2022_pipeline_en_5.5.0_3.0_1725741212670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_wmt_finetuned_enfr_wang_2022_pipeline_en_5.5.0_3.0_1725741212670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_wmt_finetuned_enfr_wang_2022_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_wmt_finetuned_enfr_wang_2022_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_wmt_finetuned_enfr_wang_2022_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.9 MB| + +## References + +https://huggingface.co/ethansimrm/opus_wmt_finetuned_enfr_wang_2022 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-othe_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-othe_1_en.md new file mode 100644 index 00000000000000..1978ea2caf0769 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-othe_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English othe_1 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: othe_1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`othe_1` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/othe_1_en_5.5.0_3.0_1725679717647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/othe_1_en_5.5.0_3.0_1725679717647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("othe_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("othe_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|othe_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Othe_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-othe_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-othe_1_pipeline_en.md new file mode 100644 index 00000000000000..8f3a5bec784d22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-othe_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English othe_1_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: othe_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`othe_1_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/othe_1_pipeline_en_5.5.0_3.0_1725679739148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/othe_1_pipeline_en_5.5.0_3.0_1725679739148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("othe_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("othe_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|othe_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Othe_1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_pipeline_vi.md new file mode 100644 index 00000000000000..4376b5b1b981c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_pipeline_vi.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Vietnamese phowhisper_tiny_vinai_pipeline pipeline WhisperForCTC from vinai +author: John Snow Labs +name: phowhisper_tiny_vinai_pipeline +date: 2024-09-07 +tags: [vi, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phowhisper_tiny_vinai_pipeline` is a Vietnamese model originally trained by vinai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phowhisper_tiny_vinai_pipeline_vi_5.5.0_3.0_1725751665056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phowhisper_tiny_vinai_pipeline_vi_5.5.0_3.0_1725751665056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("phowhisper_tiny_vinai_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("phowhisper_tiny_vinai_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phowhisper_tiny_vinai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|389.6 MB| + +## References + +https://huggingface.co/vinai/PhoWhisper-tiny + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_vi.md b/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_vi.md new file mode 100644 index 00000000000000..9e42711943f057 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-phowhisper_tiny_vinai_vi.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Vietnamese phowhisper_tiny_vinai WhisperForCTC from vinai +author: John Snow Labs +name: phowhisper_tiny_vinai +date: 2024-09-07 +tags: [vi, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phowhisper_tiny_vinai` is a Vietnamese model originally trained by vinai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phowhisper_tiny_vinai_vi_5.5.0_3.0_1725751646609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phowhisper_tiny_vinai_vi_5.5.0_3.0_1725751646609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("phowhisper_tiny_vinai","vi") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("phowhisper_tiny_vinai", "vi") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phowhisper_tiny_vinai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|vi| +|Size:|389.6 MB| + +## References + +https://huggingface.co/vinai/PhoWhisper-tiny \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_en.md b/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_en.md new file mode 100644 index 00000000000000..412ccdad1aaa7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pii_model_jayshah07 XlmRoBertaForTokenClassification from JayShah07 +author: John Snow Labs +name: pii_model_jayshah07 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_model_jayshah07` is a English model originally trained by JayShah07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_model_jayshah07_en_5.5.0_3.0_1725744549236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_model_jayshah07_en_5.5.0_3.0_1725744549236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("pii_model_jayshah07","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("pii_model_jayshah07", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_model_jayshah07| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|818.5 MB| + +## References + +https://huggingface.co/JayShah07/pii_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_pipeline_en.md new file mode 100644 index 00000000000000..0167bfa45d7e41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-pii_model_jayshah07_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pii_model_jayshah07_pipeline pipeline XlmRoBertaForTokenClassification from JayShah07 +author: John Snow Labs +name: pii_model_jayshah07_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_model_jayshah07_pipeline` is a English model originally trained by JayShah07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_model_jayshah07_pipeline_en_5.5.0_3.0_1725744663455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_model_jayshah07_pipeline_en_5.5.0_3.0_1725744663455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pii_model_jayshah07_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pii_model_jayshah07_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pii_model_jayshah07_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.5 MB| + +## References + +https://huggingface.co/JayShah07/pii_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_en.md b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_en.md new file mode 100644 index 00000000000000..68ce96b299c26e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English platzi_distilroberta_base_mrpc_glue_luigitercero RoBertaForSequenceClassification from platzi +author: John Snow Labs +name: platzi_distilroberta_base_mrpc_glue_luigitercero +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`platzi_distilroberta_base_mrpc_glue_luigitercero` is a English model originally trained by platzi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_glue_luigitercero_en_5.5.0_3.0_1725717599129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_glue_luigitercero_en_5.5.0_3.0_1725717599129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi_distilroberta_base_mrpc_glue_luigitercero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi_distilroberta_base_mrpc_glue_luigitercero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|platzi_distilroberta_base_mrpc_glue_luigitercero| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/platzi/platzi-distilroberta-base-mrpc-glue-luigitercero \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en.md new file mode 100644 index 00000000000000..2304aea4e4f298 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline pipeline RoBertaForSequenceClassification from platzi +author: John Snow Labs +name: platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline` is a English model originally trained by platzi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en_5.5.0_3.0_1725717613322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline_en_5.5.0_3.0_1725717613322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|platzi_distilroberta_base_mrpc_glue_luigitercero_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/platzi/platzi-distilroberta-base-mrpc-glue-luigitercero + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_miguel_moroyoqui_en.md b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_miguel_moroyoqui_en.md new file mode 100644 index 00000000000000..7e33d2cfc32e3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-platzi_distilroberta_base_mrpc_miguel_moroyoqui_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English platzi_distilroberta_base_mrpc_miguel_moroyoqui RoBertaForSequenceClassification from moroyoqui +author: John Snow Labs +name: platzi_distilroberta_base_mrpc_miguel_moroyoqui +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`platzi_distilroberta_base_mrpc_miguel_moroyoqui` is a English model originally trained by moroyoqui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_miguel_moroyoqui_en_5.5.0_3.0_1725718215157.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/platzi_distilroberta_base_mrpc_miguel_moroyoqui_en_5.5.0_3.0_1725718215157.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi_distilroberta_base_mrpc_miguel_moroyoqui","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi_distilroberta_base_mrpc_miguel_moroyoqui", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|platzi_distilroberta_base_mrpc_miguel_moroyoqui| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/moroyoqui/platzi-distilroberta-base-mrpc-miguel-moroyoqui \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-portuguese_finegrained_one_shot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-portuguese_finegrained_one_shot_pipeline_en.md new file mode 100644 index 00000000000000..d0f8f960b0da88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-portuguese_finegrained_one_shot_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English portuguese_finegrained_one_shot_pipeline pipeline XlmRoBertaForSequenceClassification from edwardgowsmith +author: John Snow Labs +name: portuguese_finegrained_one_shot_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_finegrained_one_shot_pipeline` is a English model originally trained by edwardgowsmith. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_finegrained_one_shot_pipeline_en_5.5.0_3.0_1725713387817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_finegrained_one_shot_pipeline_en_5.5.0_3.0_1725713387817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("portuguese_finegrained_one_shot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("portuguese_finegrained_one_shot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_finegrained_one_shot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|783.0 MB| + +## References + +https://huggingface.co/edwardgowsmith/pt-finegrained-one-shot + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_en.md b/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_en.md new file mode 100644 index 00000000000000..fe040af3e03d19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English predict_perception_xlmr_blame_concept XlmRoBertaForSequenceClassification from responsibility-framing +author: John Snow Labs +name: predict_perception_xlmr_blame_concept +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_perception_xlmr_blame_concept` is a English model originally trained by responsibility-framing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_concept_en_5.5.0_3.0_1725669266141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_concept_en_5.5.0_3.0_1725669266141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_blame_concept","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_blame_concept", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_perception_xlmr_blame_concept| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/responsibility-framing/predict-perception-xlmr-blame-concept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_pipeline_en.md new file mode 100644 index 00000000000000..3afd930f3027a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-predict_perception_xlmr_blame_concept_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English predict_perception_xlmr_blame_concept_pipeline pipeline XlmRoBertaForSequenceClassification from responsibility-framing +author: John Snow Labs +name: predict_perception_xlmr_blame_concept_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_perception_xlmr_blame_concept_pipeline` is a English model originally trained by responsibility-framing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_concept_pipeline_en_5.5.0_3.0_1725669328858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_blame_concept_pipeline_en_5.5.0_3.0_1725669328858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("predict_perception_xlmr_blame_concept_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("predict_perception_xlmr_blame_concept_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_perception_xlmr_blame_concept_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/responsibility-framing/predict-perception-xlmr-blame-concept + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-prof_ner_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-07-prof_ner_spanish_es.md new file mode 100644 index 00000000000000..8d4159613da2e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-prof_ner_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish prof_ner_spanish RoBertaForTokenClassification from BSC-NLP4BIA +author: John Snow Labs +name: prof_ner_spanish +date: 2024-09-07 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prof_ner_spanish` is a Castilian, Spanish model originally trained by BSC-NLP4BIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prof_ner_spanish_es_5.5.0_3.0_1725707958206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prof_ner_spanish_es_5.5.0_3.0_1725707958206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("prof_ner_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("prof_ner_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prof_ner_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|453.1 MB| + +## References + +https://huggingface.co/BSC-NLP4BIA/prof-ner-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-project_us_en.md b/docs/_posts/ahmedlone127/2024-09-07-project_us_en.md new file mode 100644 index 00000000000000..6e0cb1b42e30e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-project_us_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English project_us RoBertaForSequenceClassification from nebiyu29 +author: John Snow Labs +name: project_us +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`project_us` is a English model originally trained by nebiyu29. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/project_us_en_5.5.0_3.0_1725679807497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/project_us_en_5.5.0_3.0_1725679807497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("project_us","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("project_us", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|project_us| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/nebiyu29/project-us \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-project_us_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-project_us_pipeline_en.md new file mode 100644 index 00000000000000..3a986ff88aad94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-project_us_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English project_us_pipeline pipeline RoBertaForSequenceClassification from nebiyu29 +author: John Snow Labs +name: project_us_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`project_us_pipeline` is a English model originally trained by nebiyu29. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/project_us_pipeline_en_5.5.0_3.0_1725679829402.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/project_us_pipeline_en_5.5.0_3.0_1725679829402.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("project_us_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("project_us_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|project_us_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/nebiyu29/project-us + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-prompt_ls_portuguese_2_en.md b/docs/_posts/ahmedlone127/2024-09-07-prompt_ls_portuguese_2_en.md new file mode 100644 index 00000000000000..336df8d970a587 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-prompt_ls_portuguese_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English prompt_ls_portuguese_2 RoBertaEmbeddings from lmvasque +author: John Snow Labs +name: prompt_ls_portuguese_2 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prompt_ls_portuguese_2` is a English model originally trained by lmvasque. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prompt_ls_portuguese_2_en_5.5.0_3.0_1725678197881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prompt_ls_portuguese_2_en_5.5.0_3.0_1725678197881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("prompt_ls_portuguese_2","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("prompt_ls_portuguese_2","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prompt_ls_portuguese_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|634.4 MB| + +## References + +https://huggingface.co/lmvasque/prompt-ls-pt-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-psais_all_mpnet_base_v2_10shot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-psais_all_mpnet_base_v2_10shot_pipeline_en.md new file mode 100644 index 00000000000000..49d509fb0281d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-psais_all_mpnet_base_v2_10shot_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English psais_all_mpnet_base_v2_10shot_pipeline pipeline MPNetEmbeddings from hroth +author: John Snow Labs +name: psais_all_mpnet_base_v2_10shot_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`psais_all_mpnet_base_v2_10shot_pipeline` is a English model originally trained by hroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/psais_all_mpnet_base_v2_10shot_pipeline_en_5.5.0_3.0_1725703473546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/psais_all_mpnet_base_v2_10shot_pipeline_en_5.5.0_3.0_1725703473546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("psais_all_mpnet_base_v2_10shot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("psais_all_mpnet_base_v2_10shot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|psais_all_mpnet_base_v2_10shot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/hroth/psais-all-mpnet-base-v2-10shot + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-pubchem10m_smiles_bpe_390k_en.md b/docs/_posts/ahmedlone127/2024-09-07-pubchem10m_smiles_bpe_390k_en.md new file mode 100644 index 00000000000000..fdbdeecbe02856 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-pubchem10m_smiles_bpe_390k_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pubchem10m_smiles_bpe_390k RoBertaEmbeddings from seyonec +author: John Snow Labs +name: pubchem10m_smiles_bpe_390k +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubchem10m_smiles_bpe_390k` is a English model originally trained by seyonec. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_390k_en_5.5.0_3.0_1725715991806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubchem10m_smiles_bpe_390k_en_5.5.0_3.0_1725715991806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_390k","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("pubchem10m_smiles_bpe_390k","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubchem10m_smiles_bpe_390k| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.0 MB| + +## References + +https://huggingface.co/seyonec/PubChem10M_SMILES_BPE_390k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_en.md b/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_en.md new file mode 100644 index 00000000000000..822cc8819cfa73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pubmedbert_full_finetuned_ner_pablo BertForTokenClassification from pabRomero +author: John Snow Labs +name: pubmedbert_full_finetuned_ner_pablo +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_full_finetuned_ner_pablo` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725690498318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_full_finetuned_ner_pablo_en_5.5.0_3.0_1725690498318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("pubmedbert_full_finetuned_ner_pablo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("pubmedbert_full_finetuned_ner_pablo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_full_finetuned_ner_pablo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/pabRomero/PubMedBERT-full-finetuned-ner-pablo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_pipeline_en.md new file mode 100644 index 00000000000000..26b9c4efa54290 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-pubmedbert_full_finetuned_ner_pablo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English pubmedbert_full_finetuned_ner_pablo_pipeline pipeline BertForTokenClassification from pabRomero +author: John Snow Labs +name: pubmedbert_full_finetuned_ner_pablo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_full_finetuned_ner_pablo_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725690517309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725690517309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("pubmedbert_full_finetuned_ner_pablo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("pubmedbert_full_finetuned_ner_pablo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_full_finetuned_ner_pablo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/pabRomero/PubMedBERT-full-finetuned-ner-pablo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_en.md b/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_en.md new file mode 100644 index 00000000000000..c57158df83b62a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English q2d_gpt_35 MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2d_gpt_35 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2d_gpt_35` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2d_gpt_35_en_5.5.0_3.0_1725703298656.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2d_gpt_35_en_5.5.0_3.0_1725703298656.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("q2d_gpt_35","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("q2d_gpt_35","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2d_gpt_35| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2d_gpt_35 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_pipeline_en.md new file mode 100644 index 00000000000000..63a91b0ef6e148 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-q2d_gpt_35_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English q2d_gpt_35_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: q2d_gpt_35_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`q2d_gpt_35_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/q2d_gpt_35_pipeline_en_5.5.0_3.0_1725703318847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/q2d_gpt_35_pipeline_en_5.5.0_3.0_1725703318847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("q2d_gpt_35_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("q2d_gpt_35_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|q2d_gpt_35_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/q2d_gpt_35 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_ccc_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_ccc_model_pipeline_en.md new file mode 100644 index 00000000000000..a698cbc7e2a3ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_ccc_model_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_ccc_model_pipeline pipeline DistilBertForQuestionAnswering from Mdetry +author: John Snow Labs +name: qa_ccc_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_ccc_model_pipeline` is a English model originally trained by Mdetry. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_ccc_model_pipeline_en_5.5.0_3.0_1725727115247.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_ccc_model_pipeline_en_5.5.0_3.0_1725727115247.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_ccc_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_ccc_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_ccc_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Mdetry/QA_CCC_Model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_iiitdmj_testing_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_iiitdmj_testing_en.md new file mode 100644 index 00000000000000..4ffc37cc3483b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_iiitdmj_testing_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_iiitdmj_testing DistilBertForQuestionAnswering from samhitmantrala +author: John Snow Labs +name: qa_iiitdmj_testing +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_iiitdmj_testing` is a English model originally trained by samhitmantrala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_iiitdmj_testing_en_5.5.0_3.0_1725727571118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_iiitdmj_testing_en_5.5.0_3.0_1725727571118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_iiitdmj_testing","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_iiitdmj_testing", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_iiitdmj_testing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/samhitmantrala/qa_iiitdmj_testing \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_en.md new file mode 100644 index 00000000000000..d68bd8930e1990 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_fsghs DistilBertForQuestionAnswering from fsghs +author: John Snow Labs +name: qa_model_fsghs +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_fsghs` is a English model originally trained by fsghs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_fsghs_en_5.5.0_3.0_1725727102659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_fsghs_en_5.5.0_3.0_1725727102659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_fsghs","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_fsghs", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_fsghs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fsghs/qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_pipeline_en.md new file mode 100644 index 00000000000000..b4815c74b5ff16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_model_fsghs_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model_fsghs_pipeline pipeline DistilBertForQuestionAnswering from fsghs +author: John Snow Labs +name: qa_model_fsghs_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_fsghs_pipeline` is a English model originally trained by fsghs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_fsghs_pipeline_en_5.5.0_3.0_1725727115735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_fsghs_pipeline_en_5.5.0_3.0_1725727115735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model_fsghs_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model_fsghs_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_fsghs_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fsghs/qa_model + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_model_martacaldero_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_model_martacaldero_en.md new file mode 100644 index 00000000000000..ef98798dc8ad54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_model_martacaldero_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_martacaldero DistilBertForQuestionAnswering from MartaCaldero +author: John Snow Labs +name: qa_model_martacaldero +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_martacaldero` is a English model originally trained by MartaCaldero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_martacaldero_en_5.5.0_3.0_1725722686024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_martacaldero_en_5.5.0_3.0_1725722686024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_martacaldero","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_martacaldero", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_martacaldero| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MartaCaldero/qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_en.md new file mode 100644 index 00000000000000..5b1512a95ec3c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model_test_ukson DistilBertForQuestionAnswering from ukson +author: John Snow Labs +name: qa_model_test_ukson +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_test_ukson` is a English model originally trained by ukson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_test_ukson_en_5.5.0_3.0_1725745598435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_test_ukson_en_5.5.0_3.0_1725745598435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_test_ukson","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("qa_model_test_ukson", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_test_ukson| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ukson/qa_model_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_pipeline_en.md new file mode 100644 index 00000000000000..60876e8b24aaa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_model_test_ukson_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_model_test_ukson_pipeline pipeline DistilBertForQuestionAnswering from ukson +author: John Snow Labs +name: qa_model_test_ukson_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model_test_ukson_pipeline` is a English model originally trained by ukson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model_test_ukson_pipeline_en_5.5.0_3.0_1725745611678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model_test_ukson_pipeline_en_5.5.0_3.0_1725745611678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_model_test_ukson_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_model_test_ukson_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model_test_ukson_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ukson/qa_model_test + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_en.md new file mode 100644 index 00000000000000..ad73a5a3ebe4e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_synthetic_data_with_real_data_finetuned_v2_0 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_with_real_data_finetuned_v2_0 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_with_real_data_finetuned_v2_0` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_with_real_data_finetuned_v2_0_en_5.5.0_3.0_1725711020696.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_with_real_data_finetuned_v2_0_en_5.5.0_3.0_1725711020696.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_with_real_data_finetuned_v2_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("qa_synthetic_data_with_real_data_finetuned_v2_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_with_real_data_finetuned_v2_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|851.8 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_with_real_Data_Finetuned_v2.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en.md new file mode 100644 index 00000000000000..eceed71099263e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en_5.5.0_3.0_1725711076719.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline_en_5.5.0_3.0_1725711076719.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_synthetic_data_with_real_data_finetuned_v2_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|851.8 MB| + +## References + +https://huggingface.co/am-infoweb/QA_SYNTHETIC_DATA_with_real_Data_Finetuned_v2.0 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_en.md new file mode 100644 index 00000000000000..c40cb7b0a91dcf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qamodel_distilbert BertForQuestionAnswering from itsamitkumar +author: John Snow Labs +name: qamodel_distilbert +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, bert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qamodel_distilbert` is a English model originally trained by itsamitkumar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qamodel_distilbert_en_5.5.0_3.0_1725709369299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qamodel_distilbert_en_5.5.0_3.0_1725709369299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = BertForQuestionAnswering.pretrained("qamodel_distilbert","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = BertForQuestionAnswering.pretrained("qamodel_distilbert", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qamodel_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/itsamitkumar/qamodel_distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_pipeline_en.md new file mode 100644 index 00000000000000..19240162167bdb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-qamodel_distilbert_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English qamodel_distilbert_pipeline pipeline BertForQuestionAnswering from itsamitkumar +author: John Snow Labs +name: qamodel_distilbert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qamodel_distilbert_pipeline` is a English model originally trained by itsamitkumar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qamodel_distilbert_pipeline_en_5.5.0_3.0_1725709399330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qamodel_distilbert_pipeline_en_5.5.0_3.0_1725709399330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("qamodel_distilbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("qamodel_distilbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qamodel_distilbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/itsamitkumar/qamodel_distilbert + +## Included Models + +- MultiDocumentAssembler +- BertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-queansmodel_en.md b/docs/_posts/ahmedlone127/2024-09-07-queansmodel_en.md new file mode 100644 index 00000000000000..179d4d2b3cf7db --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-queansmodel_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English queansmodel DistilBertForQuestionAnswering from KeiMura +author: John Snow Labs +name: queansmodel +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`queansmodel` is a English model originally trained by KeiMura. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/queansmodel_en_5.5.0_3.0_1725695627268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/queansmodel_en_5.5.0_3.0_1725695627268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("queansmodel","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("queansmodel", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|queansmodel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/KeiMura/QueAnsModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_en.md b/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_en.md new file mode 100644 index 00000000000000..2f883e45f28645 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English question_answering_hansollll DistilBertForQuestionAnswering from Hansollll +author: John Snow Labs +name: question_answering_hansollll +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_hansollll` is a English model originally trained by Hansollll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_hansollll_en_5.5.0_3.0_1725745715291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_hansollll_en_5.5.0_3.0_1725745715291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("question_answering_hansollll","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("question_answering_hansollll", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_hansollll| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Hansollll/Question_Answering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_pipeline_en.md new file mode 100644 index 00000000000000..05d9cec3eb0f00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-question_answering_hansollll_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English question_answering_hansollll_pipeline pipeline DistilBertForQuestionAnswering from Hansollll +author: John Snow Labs +name: question_answering_hansollll_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`question_answering_hansollll_pipeline` is a English model originally trained by Hansollll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/question_answering_hansollll_pipeline_en_5.5.0_3.0_1725745727328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/question_answering_hansollll_pipeline_en_5.5.0_3.0_1725745727328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("question_answering_hansollll_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("question_answering_hansollll_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|question_answering_hansollll_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Hansollll/Question_Answering + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-r_fb_sms_lm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-r_fb_sms_lm_pipeline_en.md new file mode 100644 index 00000000000000..314b70706e2efc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-r_fb_sms_lm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English r_fb_sms_lm_pipeline pipeline RoBertaEmbeddings from adnankhawaja +author: John Snow Labs +name: r_fb_sms_lm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`r_fb_sms_lm_pipeline` is a English model originally trained by adnankhawaja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/r_fb_sms_lm_pipeline_en_5.5.0_3.0_1725678526874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/r_fb_sms_lm_pipeline_en_5.5.0_3.0_1725678526874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("r_fb_sms_lm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("r_fb_sms_lm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|r_fb_sms_lm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/adnankhawaja/R_FB_SMS_LM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-r_t_sms_lm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-r_t_sms_lm_pipeline_en.md new file mode 100644 index 00000000000000..e6998adab5c62b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-r_t_sms_lm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English r_t_sms_lm_pipeline pipeline RoBertaEmbeddings from adnankhawaja +author: John Snow Labs +name: r_t_sms_lm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`r_t_sms_lm_pipeline` is a English model originally trained by adnankhawaja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/r_t_sms_lm_pipeline_en_5.5.0_3.0_1725716811624.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/r_t_sms_lm_pipeline_en_5.5.0_3.0_1725716811624.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("r_t_sms_lm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("r_t_sms_lm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|r_t_sms_lm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/adnankhawaja/R_T_SMS_LM + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_en.md new file mode 100644 index 00000000000000..c797e6fbe1eb0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English rap_phase2_11jan_15i_v2 XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rap_phase2_11jan_15i_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rap_phase2_11jan_15i_v2` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rap_phase2_11jan_15i_v2_en_5.5.0_3.0_1725710685304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rap_phase2_11jan_15i_v2_en_5.5.0_3.0_1725710685304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rap_phase2_11jan_15i_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("rap_phase2_11jan_15i_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rap_phase2_11jan_15i_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|843.5 MB| + +## References + +https://huggingface.co/am-infoweb/rap_phase2_11jan_15i_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_pipeline_en.md new file mode 100644 index 00000000000000..f4ed865e6d7782 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-rap_phase2_11jan_15i_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English rap_phase2_11jan_15i_v2_pipeline pipeline XlmRoBertaForQuestionAnswering from am-infoweb +author: John Snow Labs +name: rap_phase2_11jan_15i_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rap_phase2_11jan_15i_v2_pipeline` is a English model originally trained by am-infoweb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rap_phase2_11jan_15i_v2_pipeline_en_5.5.0_3.0_1725710742550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rap_phase2_11jan_15i_v2_pipeline_en_5.5.0_3.0_1725710742550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rap_phase2_11jan_15i_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rap_phase2_11jan_15i_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rap_phase2_11jan_15i_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.5 MB| + +## References + +https://huggingface.co/am-infoweb/rap_phase2_11jan_15i_v2 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_es.md b/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_es.md new file mode 100644 index 00000000000000..f3616a8ab3811f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish readability_spanish_sentences RoBertaForSequenceClassification from somosnlp-hackathon-2022 +author: John Snow Labs +name: readability_spanish_sentences +date: 2024-09-07 +tags: [es, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readability_spanish_sentences` is a Castilian, Spanish model originally trained by somosnlp-hackathon-2022. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readability_spanish_sentences_es_5.5.0_3.0_1725718335243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readability_spanish_sentences_es_5.5.0_3.0_1725718335243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("readability_spanish_sentences","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("readability_spanish_sentences", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readability_spanish_sentences| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|es| +|Size:|444.8 MB| + +## References + +https://huggingface.co/somosnlp-hackathon-2022/readability-es-sentences \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_pipeline_es.md new file mode 100644 index 00000000000000..d3e9ea7d48411e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-readability_spanish_sentences_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish readability_spanish_sentences_pipeline pipeline RoBertaForSequenceClassification from somosnlp-hackathon-2022 +author: John Snow Labs +name: readability_spanish_sentences_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`readability_spanish_sentences_pipeline` is a Castilian, Spanish model originally trained by somosnlp-hackathon-2022. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/readability_spanish_sentences_pipeline_es_5.5.0_3.0_1725718357720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/readability_spanish_sentences_pipeline_es_5.5.0_3.0_1725718357720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("readability_spanish_sentences_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("readability_spanish_sentences_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|readability_spanish_sentences_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|444.8 MB| + +## References + +https://huggingface.co/somosnlp-hackathon-2022/readability-es-sentences + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-refpydst_5p_referredstates_split_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-refpydst_5p_referredstates_split_v1_en.md new file mode 100644 index 00000000000000..8f022334846276 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-refpydst_5p_referredstates_split_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English refpydst_5p_referredstates_split_v1 MPNetEmbeddings from Brendan +author: John Snow Labs +name: refpydst_5p_referredstates_split_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`refpydst_5p_referredstates_split_v1` is a English model originally trained by Brendan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/refpydst_5p_referredstates_split_v1_en_5.5.0_3.0_1725703453254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/refpydst_5p_referredstates_split_v1_en_5.5.0_3.0_1725703453254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("refpydst_5p_referredstates_split_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("refpydst_5p_referredstates_split_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|refpydst_5p_referredstates_split_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Brendan/refpydst-5p-referredstates-split-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-robasquerta_eu.md b/docs/_posts/ahmedlone127/2024-09-07-robasquerta_eu.md new file mode 100644 index 00000000000000..fc8628a0d4b717 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-robasquerta_eu.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Basque robasquerta RoBertaEmbeddings from mrm8488 +author: John Snow Labs +name: robasquerta +date: 2024-09-07 +tags: [eu, open_source, onnx, embeddings, roberta] +task: Embeddings +language: eu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robasquerta` is a Basque model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robasquerta_eu_5.5.0_3.0_1725672714112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robasquerta_eu_5.5.0_3.0_1725672714112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("robasquerta","eu") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("robasquerta","eu") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robasquerta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|eu| +|Size:|310.5 MB| + +## References + +https://huggingface.co/mrm8488/RoBasquERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_2020_q1_filtered_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_2020_q1_filtered_pipeline_en.md new file mode 100644 index 00000000000000..b531f5a548935d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_2020_q1_filtered_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_2020_q1_filtered_pipeline pipeline RoBertaEmbeddings from DouglasPontes +author: John Snow Labs +name: roberta_2020_q1_filtered_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_2020_q1_filtered_pipeline` is a English model originally trained by DouglasPontes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_2020_q1_filtered_pipeline_en_5.5.0_3.0_1725673279784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_2020_q1_filtered_pipeline_en_5.5.0_3.0_1725673279784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_2020_q1_filtered_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_2020_q1_filtered_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_2020_q1_filtered_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/DouglasPontes/roberta-2020-Q1-filtered + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_1_en.md new file mode 100644 index 00000000000000..bb3ac39922c478 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_10m_1 RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_10m_1 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_10m_1` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_10m_1_en_5.5.0_3.0_1725673668175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_10m_1_en_5.5.0_3.0_1725673668175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_10m_1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_10m_1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_10m_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|296.3 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-10M-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_en.md new file mode 100644 index 00000000000000..1f644050e48c7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_10m_3 RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_10m_3 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_10m_3` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_10m_3_en_5.5.0_3.0_1725677827545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_10m_3_en_5.5.0_3.0_1725677827545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_10m_3","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_10m_3","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_10m_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|296.1 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-10M-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_pipeline_en.md new file mode 100644 index 00000000000000..fa1a39c35aa19a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_10m_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_10m_3_pipeline pipeline RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_10m_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_10m_3_pipeline` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_10m_3_pipeline_en_5.5.0_3.0_1725677915019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_10m_3_pipeline_en_5.5.0_3.0_1725677915019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_10m_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_10m_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_10m_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|296.1 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-10M-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_en.md new file mode 100644 index 00000000000000..6dd833b6972982 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_1b_3 RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_1b_3 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_1b_3` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_1b_3_en_5.5.0_3.0_1725673048469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_1b_3_en_5.5.0_3.0_1725673048469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_1b_3","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_1b_3","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_1b_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|296.5 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-1B-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_pipeline_en.md new file mode 100644 index 00000000000000..f09f8f2921f145 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_1b_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_1b_3_pipeline pipeline RoBertaEmbeddings from nyu-mll +author: John Snow Labs +name: roberta_base_1b_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_1b_3_pipeline` is a English model originally trained by nyu-mll. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_1b_3_pipeline_en_5.5.0_3.0_1725673133904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_1b_3_pipeline_en_5.5.0_3.0_1725673133904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_1b_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_1b_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_1b_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|296.6 MB| + +## References + +https://huggingface.co/nyu-mll/roberta-base-1B-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_catalan_plantl_gob_es_pipeline_ca.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_catalan_plantl_gob_es_pipeline_ca.md new file mode 100644 index 00000000000000..e37d0bdb160ddd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_catalan_plantl_gob_es_pipeline_ca.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Catalan, Valencian roberta_base_catalan_plantl_gob_es_pipeline pipeline RoBertaEmbeddings from PlanTL-GOB-ES +author: John Snow Labs +name: roberta_base_catalan_plantl_gob_es_pipeline +date: 2024-09-07 +tags: [ca, open_source, pipeline, onnx] +task: Embeddings +language: ca +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_catalan_plantl_gob_es_pipeline` is a Catalan, Valencian model originally trained by PlanTL-GOB-ES. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_catalan_plantl_gob_es_pipeline_ca_5.5.0_3.0_1725698140179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_catalan_plantl_gob_es_pipeline_ca_5.5.0_3.0_1725698140179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_catalan_plantl_gob_es_pipeline", lang = "ca") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_catalan_plantl_gob_es_pipeline", lang = "ca") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_catalan_plantl_gob_es_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ca| +|Size:|297.8 MB| + +## References + +https://huggingface.co/PlanTL-GOB-ES/roberta-base-ca + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en.md new file mode 100644 index 00000000000000..52707dc7981778 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_detect_cheapfake_combined_train_test_15200_2_8 RoBertaForSequenceClassification from hoanghoavienvo +author: John Snow Labs +name: roberta_base_detect_cheapfake_combined_train_test_15200_2_8 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_detect_cheapfake_combined_train_test_15200_2_8` is a English model originally trained by hoanghoavienvo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en_5.5.0_3.0_1725717558619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_detect_cheapfake_combined_train_test_15200_2_8_en_5.5.0_3.0_1725717558619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_detect_cheapfake_combined_train_test_15200_2_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_detect_cheapfake_combined_train_test_15200_2_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_detect_cheapfake_combined_train_test_15200_2_8| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|441.9 MB| + +## References + +https://huggingface.co/hoanghoavienvo/roberta-base-detect-cheapfake-combined-train-test-15200-2-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en.md new file mode 100644 index 00000000000000..010a4a873dd324 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline pipeline RoBertaForSequenceClassification from hoanghoavienvo +author: John Snow Labs +name: roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline` is a English model originally trained by hoanghoavienvo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en_5.5.0_3.0_1725717589379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline_en_5.5.0_3.0_1725717589379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_detect_cheapfake_combined_train_test_15200_2_8_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|442.0 MB| + +## References + +https://huggingface.co/hoanghoavienvo/roberta-base-detect-cheapfake-combined-train-test-15200-2-8 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_education_domain_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_education_domain_pipeline_en.md new file mode 100644 index 00000000000000..608298e2eabe01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_education_domain_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_education_domain_pipeline pipeline RoBertaEmbeddings from Preeyank +author: John Snow Labs +name: roberta_base_education_domain_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_education_domain_pipeline` is a English model originally trained by Preeyank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_education_domain_pipeline_en_5.5.0_3.0_1725673411540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_education_domain_pipeline_en_5.5.0_3.0_1725673411540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_education_domain_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_education_domain_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_education_domain_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.3 MB| + +## References + +https://huggingface.co/Preeyank/roberta-base-education-domain + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_en.md new file mode 100644 index 00000000000000..2582f6fd3d1d93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_epoch_25 RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_25 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_25` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_25_en_5.5.0_3.0_1725678003931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_25_en_5.5.0_3.0_1725678003931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_25","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_25","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_25| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_25 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_pipeline_en.md new file mode 100644 index 00000000000000..1e955e737f1ad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_25_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_epoch_25_pipeline pipeline RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_25_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_25_pipeline` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_25_pipeline_en_5.5.0_3.0_1725678089403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_25_pipeline_en_5.5.0_3.0_1725678089403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_epoch_25_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_epoch_25_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_25_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_25 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_29_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_29_pipeline_en.md new file mode 100644 index 00000000000000..b5415d785477fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_29_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_epoch_29_pipeline pipeline RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_29_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_29_pipeline` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_29_pipeline_en_5.5.0_3.0_1725679006234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_29_pipeline_en_5.5.0_3.0_1725679006234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_epoch_29_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_epoch_29_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_29_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_29 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_43_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_43_en.md new file mode 100644 index 00000000000000..1b9a4adc753411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_epoch_43_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_epoch_43 RoBertaEmbeddings from yanaiela +author: John Snow Labs +name: roberta_base_epoch_43 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_epoch_43` is a English model originally trained by yanaiela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_43_en_5.5.0_3.0_1725673485758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_epoch_43_en_5.5.0_3.0_1725673485758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_43","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_epoch_43","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_epoch_43| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|297.3 MB| + +## References + +https://huggingface.co/yanaiela/roberta-base-epoch_43 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_en.md new file mode 100644 index 00000000000000..a8ee3495302032 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_finetuned_cola_jinchen RoBertaForSequenceClassification from Jinchen +author: John Snow Labs +name: roberta_base_finetuned_cola_jinchen +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_cola_jinchen` is a English model originally trained by Jinchen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_cola_jinchen_en_5.5.0_3.0_1725679547193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_cola_jinchen_en_5.5.0_3.0_1725679547193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_finetuned_cola_jinchen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_finetuned_cola_jinchen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_cola_jinchen| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|299.7 MB| + +## References + +https://huggingface.co/Jinchen/roberta-base-finetuned-cola \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_pipeline_en.md new file mode 100644 index 00000000000000..a740dbf6ae385d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_cola_jinchen_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_cola_jinchen_pipeline pipeline RoBertaForSequenceClassification from Jinchen +author: John Snow Labs +name: roberta_base_finetuned_cola_jinchen_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_cola_jinchen_pipeline` is a English model originally trained by Jinchen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_cola_jinchen_pipeline_en_5.5.0_3.0_1725679633796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_cola_jinchen_pipeline_en_5.5.0_3.0_1725679633796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_cola_jinchen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_cola_jinchen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_cola_jinchen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|299.7 MB| + +## References + +https://huggingface.co/Jinchen/roberta-base-finetuned-cola + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_neg_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_neg_pipeline_en.md new file mode 100644 index 00000000000000..ace0da531eb649 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_neg_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_neg_pipeline pipeline RoBertaForTokenClassification from tqoyiwcvwkephzdgsp +author: John Snow Labs +name: roberta_base_finetuned_neg_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_neg_pipeline` is a English model originally trained by tqoyiwcvwkephzdgsp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_neg_pipeline_en_5.5.0_3.0_1725723471585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_neg_pipeline_en_5.5.0_3.0_1725723471585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_neg_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_neg_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_neg_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|426.7 MB| + +## References + +https://huggingface.co/tqoyiwcvwkephzdgsp/roberta-base-finetuned-neg + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_en.md new file mode 100644 index 00000000000000..f82d18b6cf338b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_finetuned_sarcasm_news_headline_detection RoBertaForSequenceClassification from sadia72 +author: John Snow Labs +name: roberta_base_finetuned_sarcasm_news_headline_detection +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_sarcasm_news_headline_detection` is a English model originally trained by sadia72. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_sarcasm_news_headline_detection_en_5.5.0_3.0_1725680536655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_sarcasm_news_headline_detection_en_5.5.0_3.0_1725680536655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_finetuned_sarcasm_news_headline_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_finetuned_sarcasm_news_headline_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_sarcasm_news_headline_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|451.8 MB| + +## References + +https://huggingface.co/sadia72/roberta-base-finetuned-sarcasm-news-headline-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en.md new file mode 100644 index 00000000000000..b35ccb5f5e6ff5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_finetuned_sarcasm_news_headline_detection_pipeline pipeline RoBertaForSequenceClassification from sadia72 +author: John Snow Labs +name: roberta_base_finetuned_sarcasm_news_headline_detection_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_finetuned_sarcasm_news_headline_detection_pipeline` is a English model originally trained by sadia72. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en_5.5.0_3.0_1725680558059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_finetuned_sarcasm_news_headline_detection_pipeline_en_5.5.0_3.0_1725680558059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_finetuned_sarcasm_news_headline_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_finetuned_sarcasm_news_headline_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_finetuned_sarcasm_news_headline_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|451.9 MB| + +## References + +https://huggingface.co/sadia72/roberta-base-finetuned-sarcasm-news-headline-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_en.md new file mode 100644 index 00000000000000..181fe8efe88268 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_ftd_on_glue_qqp_iter_1 RoBertaForSequenceClassification from Ibrahim-Alam +author: John Snow Labs +name: roberta_base_ftd_on_glue_qqp_iter_1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ftd_on_glue_qqp_iter_1` is a English model originally trained by Ibrahim-Alam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ftd_on_glue_qqp_iter_1_en_5.5.0_3.0_1725718343947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ftd_on_glue_qqp_iter_1_en_5.5.0_3.0_1725718343947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_ftd_on_glue_qqp_iter_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_ftd_on_glue_qqp_iter_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ftd_on_glue_qqp_iter_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|466.5 MB| + +## References + +https://huggingface.co/Ibrahim-Alam/roberta-base_FTd_on_glue-qqp_iter-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en.md new file mode 100644 index 00000000000000..413208f531414c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_ftd_on_glue_qqp_iter_1_pipeline pipeline RoBertaForSequenceClassification from Ibrahim-Alam +author: John Snow Labs +name: roberta_base_ftd_on_glue_qqp_iter_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ftd_on_glue_qqp_iter_1_pipeline` is a English model originally trained by Ibrahim-Alam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en_5.5.0_3.0_1725718365073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ftd_on_glue_qqp_iter_1_pipeline_en_5.5.0_3.0_1725718365073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_ftd_on_glue_qqp_iter_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_ftd_on_glue_qqp_iter_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ftd_on_glue_qqp_iter_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.5 MB| + +## References + +https://huggingface.co/Ibrahim-Alam/roberta-base_FTd_on_glue-qqp_iter-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_en.md new file mode 100644 index 00000000000000..286edf68afafc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_lego_emotions RoBertaForSequenceClassification from 0ssamaak0 +author: John Snow Labs +name: roberta_base_lego_emotions +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_lego_emotions` is a English model originally trained by 0ssamaak0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_lego_emotions_en_5.5.0_3.0_1725717781773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_lego_emotions_en_5.5.0_3.0_1725717781773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_lego_emotions","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_lego_emotions", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_lego_emotions| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|452.1 MB| + +## References + +https://huggingface.co/0ssamaak0/roberta-base-LEGO_emotions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_pipeline_en.md new file mode 100644 index 00000000000000..dfd3c7c356f4b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lego_emotions_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_lego_emotions_pipeline pipeline RoBertaForSequenceClassification from 0ssamaak0 +author: John Snow Labs +name: roberta_base_lego_emotions_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_lego_emotions_pipeline` is a English model originally trained by 0ssamaak0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_lego_emotions_pipeline_en_5.5.0_3.0_1725717806938.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_lego_emotions_pipeline_en_5.5.0_3.0_1725717806938.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_lego_emotions_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_lego_emotions_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_lego_emotions_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|452.1 MB| + +## References + +https://huggingface.co/0ssamaak0/roberta-base-LEGO_emotions + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_en.md new file mode 100644 index 00000000000000..c9b14e76af4fb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_lener_breton RoBertaForTokenClassification from CassioBN +author: John Snow Labs +name: roberta_base_lener_breton +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_lener_breton` is a English model originally trained by CassioBN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_lener_breton_en_5.5.0_3.0_1725721118339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_lener_breton_en_5.5.0_3.0_1725721118339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_lener_breton","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_lener_breton", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_lener_breton| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|428.4 MB| + +## References + +https://huggingface.co/CassioBN/roberta-base_LeNER-Br \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_pipeline_en.md new file mode 100644 index 00000000000000..2b09c9a3f61c1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_lener_breton_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_lener_breton_pipeline pipeline RoBertaForTokenClassification from CassioBN +author: John Snow Labs +name: roberta_base_lener_breton_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_lener_breton_pipeline` is a English model originally trained by CassioBN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_lener_breton_pipeline_en_5.5.0_3.0_1725721150514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_lener_breton_pipeline_en_5.5.0_3.0_1725721150514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_lener_breton_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_lener_breton_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_lener_breton_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|428.4 MB| + +## References + +https://huggingface.co/CassioBN/roberta-base_LeNER-Br + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_en.md new file mode 100644 index 00000000000000..d74ab5e05f53d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_ner_demo3 XlmRoBertaForTokenClassification from peace4ever +author: John Snow Labs +name: roberta_base_ner_demo3 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_demo3` is a English model originally trained by peace4ever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo3_en_5.5.0_3.0_1725704353395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo3_en_5.5.0_3.0_1725704353395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("roberta_base_ner_demo3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("roberta_base_ner_demo3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_demo3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|902.9 MB| + +## References + +https://huggingface.co/peace4ever/roberta-base-ner-demo3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_pipeline_en.md new file mode 100644 index 00000000000000..d4dd5727a7e163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_ner_demo3_pipeline pipeline XlmRoBertaForTokenClassification from peace4ever +author: John Snow Labs +name: roberta_base_ner_demo3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_demo3_pipeline` is a English model originally trained by peace4ever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo3_pipeline_en_5.5.0_3.0_1725704415184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo3_pipeline_en_5.5.0_3.0_1725704415184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_ner_demo3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_ner_demo3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_demo3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|902.9 MB| + +## References + +https://huggingface.co/peace4ever/roberta-base-ner-demo3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_ganbold13_mn.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_ganbold13_mn.md new file mode 100644 index 00000000000000..e04df4de3fa9a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_ganbold13_mn.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Mongolian roberta_base_ner_demo_ganbold13 RoBertaForTokenClassification from ganbold13 +author: John Snow Labs +name: roberta_base_ner_demo_ganbold13 +date: 2024-09-07 +tags: [mn, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: mn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_demo_ganbold13` is a Mongolian model originally trained by ganbold13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo_ganbold13_mn_5.5.0_3.0_1725720924384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo_ganbold13_mn_5.5.0_3.0_1725720924384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_demo_ganbold13","mn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_demo_ganbold13", "mn") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_demo_ganbold13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|mn| +|Size:|465.7 MB| + +## References + +https://huggingface.co/ganbold13/roberta-base-ner-demo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_turshilt2_mn.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_turshilt2_mn.md new file mode 100644 index 00000000000000..c26887f15d248c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_ner_demo_turshilt2_mn.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Mongolian roberta_base_ner_demo_turshilt2 RoBertaForTokenClassification from sanchirjav +author: John Snow Labs +name: roberta_base_ner_demo_turshilt2 +date: 2024-09-07 +tags: [mn, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: mn +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_ner_demo_turshilt2` is a Mongolian model originally trained by sanchirjav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo_turshilt2_mn_5.5.0_3.0_1725708013017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_ner_demo_turshilt2_mn_5.5.0_3.0_1725708013017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_demo_turshilt2","mn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_base_ner_demo_turshilt2", "mn") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_ner_demo_turshilt2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|mn| +|Size:|465.7 MB| + +## References + +https://huggingface.co/sanchirjav/roberta-base-ner-demo-turshilt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_qqp_two_stage_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_qqp_two_stage_pipeline_en.md new file mode 100644 index 00000000000000..a21992d17cba73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_qqp_two_stage_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_qqp_two_stage_pipeline pipeline RoBertaEmbeddings from ji-xin +author: John Snow Labs +name: roberta_base_qqp_two_stage_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_qqp_two_stage_pipeline` is a English model originally trained by ji-xin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_qqp_two_stage_pipeline_en_5.5.0_3.0_1725677851331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_qqp_two_stage_pipeline_en_5.5.0_3.0_1725677851331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_qqp_two_stage_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_qqp_two_stage_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_qqp_two_stage_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|462.1 MB| + +## References + +https://huggingface.co/ji-xin/roberta_base-QQP-two_stage + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_en.md new file mode 100644 index 00000000000000..c3ab63e8d3a221 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_rte_two_stage RoBertaEmbeddings from ji-xin +author: John Snow Labs +name: roberta_base_rte_two_stage +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_rte_two_stage` is a English model originally trained by ji-xin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_rte_two_stage_en_5.5.0_3.0_1725678692857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_rte_two_stage_en_5.5.0_3.0_1725678692857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_rte_two_stage","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_rte_two_stage","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_rte_two_stage| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|436.2 MB| + +## References + +https://huggingface.co/ji-xin/roberta_base-RTE-two_stage \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_pipeline_en.md new file mode 100644 index 00000000000000..2385fc18d0d4a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_rte_two_stage_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_rte_two_stage_pipeline pipeline RoBertaEmbeddings from ji-xin +author: John Snow Labs +name: roberta_base_rte_two_stage_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_rte_two_stage_pipeline` is a English model originally trained by ji-xin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_rte_two_stage_pipeline_en_5.5.0_3.0_1725678724232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_rte_two_stage_pipeline_en_5.5.0_3.0_1725678724232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_rte_two_stage_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_rte_two_stage_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_rte_two_stage_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|436.3 MB| + +## References + +https://huggingface.co/ji-xin/roberta_base-RTE-two_stage + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_thai_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_thai_en.md new file mode 100644 index 00000000000000..663116e3158a58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_thai_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_thai RoBertaEmbeddings from flax-community +author: John Snow Labs +name: roberta_base_thai +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_thai` is a English model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_thai_en_5.5.0_3.0_1725672702571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_thai_en_5.5.0_3.0_1725672702571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_base_thai","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_base_thai","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_thai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|192.3 MB| + +## References + +https://huggingface.co/flax-community/roberta-base-thai \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_en.md new file mode 100644 index 00000000000000..276324d8853f64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_base_vira_intents_live RoBertaForSequenceClassification from ibm +author: John Snow Labs +name: roberta_base_vira_intents_live +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_vira_intents_live` is a English model originally trained by ibm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_vira_intents_live_en_5.5.0_3.0_1725679544451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_vira_intents_live_en_5.5.0_3.0_1725679544451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_vira_intents_live","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_base_vira_intents_live", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_vira_intents_live| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|422.8 MB| + +## References + +https://huggingface.co/ibm/roberta-base-vira-intents-live \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_pipeline_en.md new file mode 100644 index 00000000000000..767fdc2a3c6922 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_base_vira_intents_live_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_vira_intents_live_pipeline pipeline RoBertaForSequenceClassification from ibm +author: John Snow Labs +name: roberta_base_vira_intents_live_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_vira_intents_live_pipeline` is a English model originally trained by ibm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_vira_intents_live_pipeline_en_5.5.0_3.0_1725679583011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_vira_intents_live_pipeline_en_5.5.0_3.0_1725679583011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_vira_intents_live_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_vira_intents_live_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_vira_intents_live_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|422.9 MB| + +## References + +https://huggingface.co/ibm/roberta-base-vira-intents-live + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en.md new file mode 100644 index 00000000000000..812abdb9d83a2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_classifier_large_realsumm_by_examples_fold2_pipeline pipeline RoBertaForSequenceClassification from shiyue +author: John Snow Labs +name: roberta_classifier_large_realsumm_by_examples_fold2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_classifier_large_realsumm_by_examples_fold2_pipeline` is a English model originally trained by shiyue. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en_5.5.0_3.0_1725718010448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_classifier_large_realsumm_by_examples_fold2_pipeline_en_5.5.0_3.0_1725718010448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_classifier_large_realsumm_by_examples_fold2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_classifier_large_realsumm_by_examples_fold2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_classifier_large_realsumm_by_examples_fold2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shiyue/roberta-large-realsumm-by-examples-fold2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_conll_learning_rate1e4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_conll_learning_rate1e4_pipeline_en.md new file mode 100644 index 00000000000000..48c2bdb400f4a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_conll_learning_rate1e4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_conll_learning_rate1e4_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_conll_learning_rate1e4_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_conll_learning_rate1e4_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate1e4_pipeline_en_5.5.0_3.0_1725721083724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_conll_learning_rate1e4_pipeline_en_5.5.0_3.0_1725721083724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_conll_learning_rate1e4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_conll_learning_rate1e4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_conll_learning_rate1e4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.6 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_conll_learning_rate1e4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_en.md new file mode 100644 index 00000000000000..9d363c642c0f0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_dpt_online_sexism_detection RoBertaEmbeddings from debashish-roy +author: John Snow Labs +name: roberta_dpt_online_sexism_detection +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_dpt_online_sexism_detection` is a English model originally trained by debashish-roy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_dpt_online_sexism_detection_en_5.5.0_3.0_1725677917601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_dpt_online_sexism_detection_en_5.5.0_3.0_1725677917601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_dpt_online_sexism_detection","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_dpt_online_sexism_detection","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_dpt_online_sexism_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.9 MB| + +## References + +https://huggingface.co/debashish-roy/Roberta-DPT-Online-Sexism-Detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_pipeline_en.md new file mode 100644 index 00000000000000..c6a26a868b2ed5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_dpt_online_sexism_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_dpt_online_sexism_detection_pipeline pipeline RoBertaEmbeddings from debashish-roy +author: John Snow Labs +name: roberta_dpt_online_sexism_detection_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_dpt_online_sexism_detection_pipeline` is a English model originally trained by debashish-roy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_dpt_online_sexism_detection_pipeline_en_5.5.0_3.0_1725677939723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_dpt_online_sexism_detection_pipeline_en_5.5.0_3.0_1725677939723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_dpt_online_sexism_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_dpt_online_sexism_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_dpt_online_sexism_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/debashish-roy/Roberta-DPT-Online-Sexism-Detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_en.md new file mode 100644 index 00000000000000..515adc3ea709d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta RoBertaForTokenClassification from autosyrup +author: John Snow Labs +name: roberta +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta` is a English model originally trained by autosyrup. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_en_5.5.0_3.0_1725706503898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_en_5.5.0_3.0_1725706503898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/autosyrup/roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_en.md new file mode 100644 index 00000000000000..a753e5145598df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_full_finetuned_ner_pablo RoBertaForTokenClassification from pabRomero +author: John Snow Labs +name: roberta_full_finetuned_ner_pablo +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_full_finetuned_ner_pablo` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_full_finetuned_ner_pablo_en_5.5.0_3.0_1725706801177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_full_finetuned_ner_pablo_en_5.5.0_3.0_1725706801177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_full_finetuned_ner_pablo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_full_finetuned_ner_pablo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_full_finetuned_ner_pablo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|441.8 MB| + +## References + +https://huggingface.co/pabRomero/RoBERTa-full-finetuned-ner-pablo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_pipeline_en.md new file mode 100644 index 00000000000000..0862752154718b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_full_finetuned_ner_pablo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_full_finetuned_ner_pablo_pipeline pipeline RoBertaForTokenClassification from pabRomero +author: John Snow Labs +name: roberta_full_finetuned_ner_pablo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_full_finetuned_ner_pablo_pipeline` is a English model originally trained by pabRomero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725706826777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_full_finetuned_ner_pablo_pipeline_en_5.5.0_3.0_1725706826777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_full_finetuned_ner_pablo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_full_finetuned_ner_pablo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_full_finetuned_ner_pablo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|441.8 MB| + +## References + +https://huggingface.co/pabRomero/RoBERTa-full-finetuned-ner-pablo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_full_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_full_en.md new file mode 100644 index 00000000000000..e6df7a4812650c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_full_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_ganda_cased_malay_ner_full RoBertaForTokenClassification from nxaliao +author: John Snow Labs +name: roberta_ganda_cased_malay_ner_full +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_ganda_cased_malay_ner_full` is a English model originally trained by nxaliao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_full_en_5.5.0_3.0_1725668213838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_full_en_5.5.0_3.0_1725668213838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ganda_cased_malay_ner_full","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ganda_cased_malay_ner_full", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ganda_cased_malay_ner_full| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nxaliao/roberta-lg-cased-ms-ner-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_en.md new file mode 100644 index 00000000000000..11168028579f05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_ganda_cased_malay_ner_v3_test RoBertaForTokenClassification from nxaliao +author: John Snow Labs +name: roberta_ganda_cased_malay_ner_v3_test +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_ganda_cased_malay_ner_v3_test` is a English model originally trained by nxaliao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_v3_test_en_5.5.0_3.0_1725668758870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_v3_test_en_5.5.0_3.0_1725668758870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ganda_cased_malay_ner_v3_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_ganda_cased_malay_ner_v3_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ganda_cased_malay_ner_v3_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nxaliao/roberta-lg-cased-ms-ner-v3-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_pipeline_en.md new file mode 100644 index 00000000000000..3fd076dbccf1ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_ganda_cased_malay_ner_v3_test_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_ganda_cased_malay_ner_v3_test_pipeline pipeline RoBertaForTokenClassification from nxaliao +author: John Snow Labs +name: roberta_ganda_cased_malay_ner_v3_test_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_ganda_cased_malay_ner_v3_test_pipeline` is a English model originally trained by nxaliao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_v3_test_pipeline_en_5.5.0_3.0_1725668827937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ganda_cased_malay_ner_v3_test_pipeline_en_5.5.0_3.0_1725668827937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_ganda_cased_malay_ner_v3_test_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_ganda_cased_malay_ner_v3_test_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ganda_cased_malay_ner_v3_test_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/nxaliao/roberta-lg-cased-ms-ner-v3-test + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_india_ner_trainer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_india_ner_trainer_pipeline_en.md new file mode 100644 index 00000000000000..4cfe1795ff8bc2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_india_ner_trainer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_india_ner_trainer_pipeline pipeline RoBertaForTokenClassification from iamfadi +author: John Snow Labs +name: roberta_india_ner_trainer_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_india_ner_trainer_pipeline` is a English model originally trained by iamfadi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_india_ner_trainer_pipeline_en_5.5.0_3.0_1725668605249.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_india_ner_trainer_pipeline_en_5.5.0_3.0_1725668605249.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_india_ner_trainer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_india_ner_trainer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_india_ner_trainer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|444.2 MB| + +## References + +https://huggingface.co/iamfadi/roberta_india_ner_trainer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_en.md new file mode 100644 index 00000000000000..92ab69df7dbed9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_biomedical RoBertaEmbeddings from avacaondata +author: John Snow Labs +name: roberta_large_biomedical +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_biomedical` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_biomedical_en_5.5.0_3.0_1725715893861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_biomedical_en_5.5.0_3.0_1725715893861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_large_biomedical","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_large_biomedical","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_biomedical| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/avacaondata/roberta-large-biomedical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_pipeline_en.md new file mode 100644 index 00000000000000..8149bdc68b6db7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_biomedical_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_biomedical_pipeline pipeline RoBertaEmbeddings from avacaondata +author: John Snow Labs +name: roberta_large_biomedical_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_biomedical_pipeline` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_biomedical_pipeline_en_5.5.0_3.0_1725715960119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_biomedical_pipeline_en_5.5.0_3.0_1725715960119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_biomedical_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_biomedical_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_biomedical_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/avacaondata/roberta-large-biomedical + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_es.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_es.md new file mode 100644 index 00000000000000..19fb94e3119534 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish roberta_large_bne_livingner1 RoBertaForTokenClassification from IIC +author: John Snow Labs +name: roberta_large_bne_livingner1 +date: 2024-09-07 +tags: [es, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_bne_livingner1` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_bne_livingner1_es_5.5.0_3.0_1725708678875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_bne_livingner1_es_5.5.0_3.0_1725708678875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_bne_livingner1","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_bne_livingner1", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_bne_livingner1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|es| +|Size:|1.3 GB| + +## References + +https://huggingface.co/IIC/roberta-large-bne-livingner1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_pipeline_es.md new file mode 100644 index 00000000000000..f65a774cabf947 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_bne_livingner1_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish roberta_large_bne_livingner1_pipeline pipeline RoBertaForTokenClassification from IIC +author: John Snow Labs +name: roberta_large_bne_livingner1_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_bne_livingner1_pipeline` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_bne_livingner1_pipeline_es_5.5.0_3.0_1725708750663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_bne_livingner1_pipeline_es_5.5.0_3.0_1725708750663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_bne_livingner1_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_bne_livingner1_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_bne_livingner1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|1.3 GB| + +## References + +https://huggingface.co/IIC/roberta-large-bne-livingner1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_ca.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_ca.md new file mode 100644 index 00000000000000..600205b1c72470 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_ca.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Catalan, Valencian roberta_large_catalan_v2 RoBertaEmbeddings from projecte-aina +author: John Snow Labs +name: roberta_large_catalan_v2 +date: 2024-09-07 +tags: [ca, open_source, onnx, embeddings, roberta] +task: Embeddings +language: ca +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_catalan_v2` is a Catalan, Valencian model originally trained by projecte-aina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_catalan_v2_ca_5.5.0_3.0_1725716852874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_catalan_v2_ca_5.5.0_3.0_1725716852874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_large_catalan_v2","ca") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_large_catalan_v2","ca") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_catalan_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|ca| +|Size:|844.0 MB| + +## References + +https://huggingface.co/projecte-aina/roberta-large-ca-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_pipeline_ca.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_pipeline_ca.md new file mode 100644 index 00000000000000..0bb9e63c351983 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_catalan_v2_pipeline_ca.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Catalan, Valencian roberta_large_catalan_v2_pipeline pipeline RoBertaEmbeddings from projecte-aina +author: John Snow Labs +name: roberta_large_catalan_v2_pipeline +date: 2024-09-07 +tags: [ca, open_source, pipeline, onnx] +task: Embeddings +language: ca +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_catalan_v2_pipeline` is a Catalan, Valencian model originally trained by projecte-aina. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_catalan_v2_pipeline_ca_5.5.0_3.0_1725717080816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_catalan_v2_pipeline_ca_5.5.0_3.0_1725717080816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_catalan_v2_pipeline", lang = "ca") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_catalan_v2_pipeline", lang = "ca") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_catalan_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ca| +|Size:|844.1 MB| + +## References + +https://huggingface.co/projecte-aina/roberta-large-ca-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_genia_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_genia_ner_pipeline_en.md new file mode 100644 index 00000000000000..f680e1d2f9a00a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_genia_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_genia_ner_pipeline pipeline RoBertaForTokenClassification from CheccoCando +author: John Snow Labs +name: roberta_large_genia_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_genia_ner_pipeline` is a English model originally trained by CheccoCando. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_genia_ner_pipeline_en_5.5.0_3.0_1725723922371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_genia_ner_pipeline_en_5.5.0_3.0_1725723922371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_genia_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_genia_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_genia_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/CheccoCando/roberta-large_GENIA_NER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_mrpc_two_stage_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_mrpc_two_stage_pipeline_en.md new file mode 100644 index 00000000000000..e452b47b17e60d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_mrpc_two_stage_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_mrpc_two_stage_pipeline pipeline RoBertaEmbeddings from ji-xin +author: John Snow Labs +name: roberta_large_mrpc_two_stage_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_mrpc_two_stage_pipeline` is a English model originally trained by ji-xin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_mrpc_two_stage_pipeline_en_5.5.0_3.0_1725678143352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_mrpc_two_stage_pipeline_en_5.5.0_3.0_1725678143352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_mrpc_two_stage_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_mrpc_two_stage_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_mrpc_two_stage_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ji-xin/roberta_large-MRPC-two_stage + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_en.md new file mode 100644 index 00000000000000..1cc7f0eb85c162 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_large_ner_english_finetuned_ner RoBertaForTokenClassification from GiladH +author: John Snow Labs +name: roberta_large_ner_english_finetuned_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_ner_english_finetuned_ner` is a English model originally trained by GiladH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_ner_english_finetuned_ner_en_5.5.0_3.0_1725708313185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_ner_english_finetuned_ner_en_5.5.0_3.0_1725708313185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_ner_english_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_large_ner_english_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_ner_english_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/GiladH/roberta-large-ner-english-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..644b0b216b8a31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_ner_english_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_ner_english_finetuned_ner_pipeline pipeline RoBertaForTokenClassification from GiladH +author: John Snow Labs +name: roberta_large_ner_english_finetuned_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_ner_english_finetuned_ner_pipeline` is a English model originally trained by GiladH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_ner_english_finetuned_ner_pipeline_en_5.5.0_3.0_1725708383947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_ner_english_finetuned_ner_pipeline_en_5.5.0_3.0_1725708383947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_ner_english_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_ner_english_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_ner_english_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/GiladH/roberta-large-ner-english-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_schizophrenia_v3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_schizophrenia_v3_pipeline_en.md new file mode 100644 index 00000000000000..59d5a58b63fa52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_schizophrenia_v3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_large_schizophrenia_v3_pipeline pipeline RoBertaEmbeddings from Amalq +author: John Snow Labs +name: roberta_large_schizophrenia_v3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_schizophrenia_v3_pipeline` is a English model originally trained by Amalq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_schizophrenia_v3_pipeline_en_5.5.0_3.0_1725678593011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_schizophrenia_v3_pipeline_en_5.5.0_3.0_1725678593011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_schizophrenia_v3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_schizophrenia_v3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_schizophrenia_v3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Amalq/roberta-large-schizophrenia-v3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_fi.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_fi.md new file mode 100644 index 00000000000000..2045c481c90468 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_fi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Finnish roberta_large_wechsel_finnish RoBertaEmbeddings from Finnish-NLP +author: John Snow Labs +name: roberta_large_wechsel_finnish +date: 2024-09-07 +tags: [fi, open_source, onnx, embeddings, roberta] +task: Embeddings +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_wechsel_finnish` is a Finnish model originally trained by Finnish-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_wechsel_finnish_fi_5.5.0_3.0_1725698010448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_wechsel_finnish_fi_5.5.0_3.0_1725698010448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_large_wechsel_finnish","fi") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_large_wechsel_finnish","fi") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_wechsel_finnish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|fi| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Finnish-NLP/roberta-large-wechsel-finnish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_pipeline_fi.md new file mode 100644 index 00000000000000..3488476507ca1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_large_wechsel_finnish_pipeline_fi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Finnish roberta_large_wechsel_finnish_pipeline pipeline RoBertaEmbeddings from Finnish-NLP +author: John Snow Labs +name: roberta_large_wechsel_finnish_pipeline +date: 2024-09-07 +tags: [fi, open_source, pipeline, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_large_wechsel_finnish_pipeline` is a Finnish model originally trained by Finnish-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_large_wechsel_finnish_pipeline_fi_5.5.0_3.0_1725698069905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_large_wechsel_finnish_pipeline_fi_5.5.0_3.0_1725698069905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_large_wechsel_finnish_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_large_wechsel_finnish_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_large_wechsel_finnish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Finnish-NLP/roberta-large-wechsel-finnish + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_en.md new file mode 100644 index 00000000000000..09cece73f454c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_moral_emotion_eng RoBertaForSequenceClassification from Chaeyoon +author: John Snow Labs +name: roberta_moral_emotion_eng +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_moral_emotion_eng` is a English model originally trained by Chaeyoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_moral_emotion_eng_en_5.5.0_3.0_1725679733037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_moral_emotion_eng_en_5.5.0_3.0_1725679733037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_moral_emotion_eng","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("roberta_moral_emotion_eng", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_moral_emotion_eng| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|445.7 MB| + +## References + +https://huggingface.co/Chaeyoon/RoBERTa-Moral-Emotion-ENG \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_pipeline_en.md new file mode 100644 index 00000000000000..27b0c328b3bcb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_moral_emotion_eng_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_moral_emotion_eng_pipeline pipeline RoBertaForSequenceClassification from Chaeyoon +author: John Snow Labs +name: roberta_moral_emotion_eng_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_moral_emotion_eng_pipeline` is a English model originally trained by Chaeyoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_moral_emotion_eng_pipeline_en_5.5.0_3.0_1725679762217.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_moral_emotion_eng_pipeline_en_5.5.0_3.0_1725679762217.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_moral_emotion_eng_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_moral_emotion_eng_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_moral_emotion_eng_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|445.7 MB| + +## References + +https://huggingface.co/Chaeyoon/RoBERTa-Moral-Emotion-ENG + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_ncc_des_128_decayfrom200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_ncc_des_128_decayfrom200_pipeline_en.md new file mode 100644 index 00000000000000..2a477580443212 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_ncc_des_128_decayfrom200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_ncc_des_128_decayfrom200_pipeline pipeline RoBertaEmbeddings from NbAiLab +author: John Snow Labs +name: roberta_ncc_des_128_decayfrom200_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_ncc_des_128_decayfrom200_pipeline` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_ncc_des_128_decayfrom200_pipeline_en_5.5.0_3.0_1725673683754.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_ncc_des_128_decayfrom200_pipeline_en_5.5.0_3.0_1725673683754.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_ncc_des_128_decayfrom200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_ncc_des_128_decayfrom200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_ncc_des_128_decayfrom200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/NbAiLab/roberta_NCC_des_128_decayfrom200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_pipeline_en.md new file mode 100644 index 00000000000000..68acedfd816c88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_pipeline pipeline RoBertaForTokenClassification from autosyrup +author: John Snow Labs +name: roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_pipeline` is a English model originally trained by autosyrup. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_pipeline_en_5.5.0_3.0_1725706565091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_pipeline_en_5.5.0_3.0_1725706565091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/autosyrup/roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_en.md new file mode 100644 index 00000000000000..d48371af13543e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_poetry_anger_crpo RoBertaEmbeddings from andreipb +author: John Snow Labs +name: roberta_poetry_anger_crpo +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_poetry_anger_crpo` is a English model originally trained by andreipb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_poetry_anger_crpo_en_5.5.0_3.0_1725715875598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_poetry_anger_crpo_en_5.5.0_3.0_1725715875598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_poetry_anger_crpo","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_poetry_anger_crpo","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_poetry_anger_crpo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/andreipb/roberta-poetry-anger-crpo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_pipeline_en.md new file mode 100644 index 00000000000000..54456a2f8cfc50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_poetry_anger_crpo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_poetry_anger_crpo_pipeline pipeline RoBertaEmbeddings from andreipb +author: John Snow Labs +name: roberta_poetry_anger_crpo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_poetry_anger_crpo_pipeline` is a English model originally trained by andreipb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_poetry_anger_crpo_pipeline_en_5.5.0_3.0_1725715896189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_poetry_anger_crpo_pipeline_en_5.5.0_3.0_1725715896189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_poetry_anger_crpo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_poetry_anger_crpo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_poetry_anger_crpo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.2 MB| + +## References + +https://huggingface.co/andreipb/roberta-poetry-anger-crpo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_psych_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_psych_en.md new file mode 100644 index 00000000000000..b4773464654c59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_psych_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_psych RoBertaEmbeddings from mlaricheva +author: John Snow Labs +name: roberta_psych +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_psych` is a English model originally trained by mlaricheva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_psych_en_5.5.0_3.0_1725677753150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_psych_en_5.5.0_3.0_1725677753150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_psych","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_psych","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_psych| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.6 MB| + +## References + +https://huggingface.co/mlaricheva/roberta-psych \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_es.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_es.md new file mode 100644 index 00000000000000..baa774ee84cc3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_es.md @@ -0,0 +1,92 @@ +--- +layout: model +title: Spanish RobertaForQuestionAnswering Base Cased model (from Evelyn18) +author: John Snow Labs +name: roberta_qa_base_spanish_squades_becasincentivos3 +date: 2024-09-07 +tags: [es, open_source, roberta, question_answering, onnx] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RobertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-base-spanish-squades-becasIncentivos3` is a Spanish model originally trained by `Evelyn18`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos3_es_5.5.0_3.0_1725699456115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos3_es_5.5.0_3.0_1725699456115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +Document_Assembler = MultiDocumentAssembler()\ + .setInputCols(["question", "context"])\ + .setOutputCols(["document_question", "document_context"]) + +Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_spanish_squades_becasincentivos3","es")\ + .setInputCols(["document_question", "document_context"])\ + .setOutputCol("answer")\ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[Document_Assembler, Question_Answering]) + +data = spark.createDataFrame([["What's my name?","My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val Document_Assembler = new MultiDocumentAssembler() + .setInputCols(Array("question", "context")) + .setOutputCols(Array("document_question", "document_context")) + +val Question_Answering = RoBertaForQuestionAnswering.pretrained("roberta_qa_base_spanish_squades_becasincentivos3","es") + .setInputCols(Array("document_question", "document_context")) + .setOutputCol("answer") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(Document_Assembler, Question_Answering)) + +val data = Seq("What's my name?","My name is Clara and I live in Berkeley.").toDS.toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_spanish_squades_becasincentivos3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|es| +|Size:|459.1 MB| + +## References + +References + +- https://huggingface.co/Evelyn18/roberta-base-spanish-squades-becasIncentivos3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es.md new file mode 100644 index 00000000000000..5057b3baac6197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Castilian, Spanish roberta_qa_base_spanish_squades_becasincentivos3_pipeline pipeline RoBertaForQuestionAnswering from Evelyn18 +author: John Snow Labs +name: roberta_qa_base_spanish_squades_becasincentivos3_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Question Answering +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_base_spanish_squades_becasincentivos3_pipeline` is a Castilian, Spanish model originally trained by Evelyn18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es_5.5.0_3.0_1725699477410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_base_spanish_squades_becasincentivos3_pipeline_es_5.5.0_3.0_1725699477410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_base_spanish_squades_becasincentivos3_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_base_spanish_squades_becasincentivos3_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_base_spanish_squades_becasincentivos3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|459.1 MB| + +## References + +https://huggingface.co/Evelyn18/roberta-base-spanish-squades-becasIncentivos3 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_en.md new file mode 100644 index 00000000000000..002c7bc9520334 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_self_trained RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_self_trained +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_self_trained` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_self_trained_en_5.5.0_3.0_1725707580705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_self_trained_en_5.5.0_3.0_1725707580705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_self_trained","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_self_trained", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_self_trained| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|306.4 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Self_Trained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_pipeline_en.md new file mode 100644 index 00000000000000..6cd7a3095a51a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_self_trained_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_self_trained_pipeline pipeline RoBertaForTokenClassification from ICT2214Team7 +author: John Snow Labs +name: roberta_self_trained_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_self_trained_pipeline` is a English model originally trained by ICT2214Team7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_self_trained_pipeline_en_5.5.0_3.0_1725707595297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_self_trained_pipeline_en_5.5.0_3.0_1725707595297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_self_trained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_self_trained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_self_trained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|306.5 MB| + +## References + +https://huggingface.co/ICT2214Team7/RoBERTa_Self_Trained + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_spanish_clinical_trials_misc_ents_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_spanish_clinical_trials_misc_ents_ner_en.md new file mode 100644 index 00000000000000..5822a7e7e17771 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_spanish_clinical_trials_misc_ents_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_spanish_clinical_trials_misc_ents_ner RoBertaForTokenClassification from medspaner +author: John Snow Labs +name: roberta_spanish_clinical_trials_misc_ents_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_spanish_clinical_trials_misc_ents_ner` is a English model originally trained by medspaner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_spanish_clinical_trials_misc_ents_ner_en_5.5.0_3.0_1725707792641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_spanish_clinical_trials_misc_ents_ner_en_5.5.0_3.0_1725707792641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_spanish_clinical_trials_misc_ents_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_spanish_clinical_trials_misc_ents_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_spanish_clinical_trials_misc_ents_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|442.5 MB| + +## References + +https://huggingface.co/medspaner/roberta-es-clinical-trials-misc-ents-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_en.md new file mode 100644 index 00000000000000..44e1b1482671ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_squad_v2 RoBertaForQuestionAnswering from tareeb23 +author: John Snow Labs +name: roberta_squad_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_squad_v2` is a English model originally trained by tareeb23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_squad_v2_en_5.5.0_3.0_1725699175903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_squad_v2_en_5.5.0_3.0_1725699175903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_squad_v2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_squad_v2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_squad_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|461.9 MB| + +## References + +https://huggingface.co/tareeb23/Roberta_SQUAD_V2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_pipeline_en.md new file mode 100644 index 00000000000000..45d87442ea3006 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_squad_v2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_squad_v2_pipeline pipeline RoBertaForQuestionAnswering from tareeb23 +author: John Snow Labs +name: roberta_squad_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_squad_v2_pipeline` is a English model originally trained by tareeb23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_squad_v2_pipeline_en_5.5.0_3.0_1725699197796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_squad_v2_pipeline_en_5.5.0_3.0_1725699197796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_squad_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_squad_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_squad_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|461.9 MB| + +## References + +https://huggingface.co/tareeb23/Roberta_SQUAD_V2 + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en.md new file mode 100644 index 00000000000000..a5e562c772d0c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_tagalog_base_ft_udpos213_indonesian_pipeline pipeline RoBertaForTokenClassification from hellojimson +author: John Snow Labs +name: roberta_tagalog_base_ft_udpos213_indonesian_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_tagalog_base_ft_udpos213_indonesian_pipeline` is a English model originally trained by hellojimson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en_5.5.0_3.0_1725667769027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_tagalog_base_ft_udpos213_indonesian_pipeline_en_5.5.0_3.0_1725667769027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_tagalog_base_ft_udpos213_indonesian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_tagalog_base_ft_udpos213_indonesian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_tagalog_base_ft_udpos213_indonesian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hellojimson/roberta-tagalog-base-ft-udpos213-id + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_en.md new file mode 100644 index 00000000000000..8d6ab36925e5be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_token_classification_araeval24_aug800 RoBertaForTokenClassification from MM2157 +author: John Snow Labs +name: roberta_token_classification_araeval24_aug800 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_token_classification_araeval24_aug800` is a English model originally trained by MM2157. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_token_classification_araeval24_aug800_en_5.5.0_3.0_1725667997308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_token_classification_araeval24_aug800_en_5.5.0_3.0_1725667997308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_token_classification_araeval24_aug800","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = RoBertaForTokenClassification.pretrained("roberta_token_classification_araeval24_aug800", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_token_classification_araeval24_aug800| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|442.1 MB| + +## References + +https://huggingface.co/MM2157/RoBERTa_token_classification_AraEval24_aug800 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_pipeline_en.md new file mode 100644 index 00000000000000..303cedfda61475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_token_classification_araeval24_aug800_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_token_classification_araeval24_aug800_pipeline pipeline RoBertaForTokenClassification from MM2157 +author: John Snow Labs +name: roberta_token_classification_araeval24_aug800_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_token_classification_araeval24_aug800_pipeline` is a English model originally trained by MM2157. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_token_classification_araeval24_aug800_pipeline_en_5.5.0_3.0_1725668018927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_token_classification_araeval24_aug800_pipeline_en_5.5.0_3.0_1725668018927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_token_classification_araeval24_aug800_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_token_classification_araeval24_aug800_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_token_classification_araeval24_aug800_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|442.1 MB| + +## References + +https://huggingface.co/MM2157/RoBERTa_token_classification_AraEval24_aug800 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_en.md new file mode 100644 index 00000000000000..541fb6cb715938 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English roberta_urdu_small_finetuned_urdu RoBertaEmbeddings from cxfajar197 +author: John Snow Labs +name: roberta_urdu_small_finetuned_urdu +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_urdu_small_finetuned_urdu` is a English model originally trained by cxfajar197. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_urdu_small_finetuned_urdu_en_5.5.0_3.0_1725697953558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_urdu_small_finetuned_urdu_en_5.5.0_3.0_1725697953558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("roberta_urdu_small_finetuned_urdu","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("roberta_urdu_small_finetuned_urdu","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_urdu_small_finetuned_urdu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|471.0 MB| + +## References + +https://huggingface.co/cxfajar197/roberta-urdu-small-finetuned-urdu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_pipeline_en.md new file mode 100644 index 00000000000000..cc7affdc83f42f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-roberta_urdu_small_finetuned_urdu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_urdu_small_finetuned_urdu_pipeline pipeline RoBertaEmbeddings from cxfajar197 +author: John Snow Labs +name: roberta_urdu_small_finetuned_urdu_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_urdu_small_finetuned_urdu_pipeline` is a English model originally trained by cxfajar197. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_urdu_small_finetuned_urdu_pipeline_en_5.5.0_3.0_1725697975374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_urdu_small_finetuned_urdu_pipeline_en_5.5.0_3.0_1725697975374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_urdu_small_finetuned_urdu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_urdu_small_finetuned_urdu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_urdu_small_finetuned_urdu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|471.0 MB| + +## References + +https://huggingface.co/cxfajar197/roberta-urdu-small-finetuned-urdu + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-robertalexpt_base_pt.md b/docs/_posts/ahmedlone127/2024-09-07-robertalexpt_base_pt.md new file mode 100644 index 00000000000000..7fed5089b152af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-robertalexpt_base_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese robertalexpt_base RoBertaEmbeddings from eduagarcia +author: John Snow Labs +name: robertalexpt_base +date: 2024-09-07 +tags: [pt, open_source, onnx, embeddings, roberta] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robertalexpt_base` is a Portuguese model originally trained by eduagarcia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robertalexpt_base_pt_5.5.0_3.0_1725697916703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robertalexpt_base_pt_5.5.0_3.0_1725697916703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("robertalexpt_base","pt") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("robertalexpt_base","pt") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robertalexpt_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|pt| +|Size:|296.7 MB| + +## References + +https://huggingface.co/eduagarcia/RoBERTaLexPT-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-robertatwitterbr_en.md b/docs/_posts/ahmedlone127/2024-09-07-robertatwitterbr_en.md new file mode 100644 index 00000000000000..18bbbb45163612 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-robertatwitterbr_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English robertatwitterbr RoBertaEmbeddings from verissimomanoel +author: John Snow Labs +name: robertatwitterbr +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`robertatwitterbr` is a English model originally trained by verissimomanoel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/robertatwitterbr_en_5.5.0_3.0_1725716496482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/robertatwitterbr_en_5.5.0_3.0_1725716496482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("robertatwitterbr","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("robertatwitterbr","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|robertatwitterbr| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|590.6 MB| + +## References + +https://huggingface.co/verissimomanoel/RobertaTwitterBR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ros_model_en.md b/docs/_posts/ahmedlone127/2024-09-07-ros_model_en.md new file mode 100644 index 00000000000000..9ffc4e6b387141 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ros_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ros_model MarianTransformer from AnasHXH +author: John Snow Labs +name: ros_model +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ros_model` is a English model originally trained by AnasHXH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ros_model_en_5.5.0_3.0_1725740829189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ros_model_en_5.5.0_3.0_1725740829189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("ros_model","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("ros_model","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ros_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/AnasHXH/Ros_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ros_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ros_model_pipeline_en.md new file mode 100644 index 00000000000000..52abd1cb54879f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ros_model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ros_model_pipeline pipeline MarianTransformer from AnasHXH +author: John Snow Labs +name: ros_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ros_model_pipeline` is a English model originally trained by AnasHXH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ros_model_pipeline_en_5.5.0_3.0_1725740898640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ros_model_pipeline_en_5.5.0_3.0_1725740898640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ros_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ros_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ros_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/AnasHXH/Ros_model + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-rseq2_full_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-rseq2_full_data_pipeline_en.md new file mode 100644 index 00000000000000..c8d03341552c98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-rseq2_full_data_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English rseq2_full_data_pipeline pipeline RoBertaForTokenClassification from C-Stuti +author: John Snow Labs +name: rseq2_full_data_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rseq2_full_data_pipeline` is a English model originally trained by C-Stuti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rseq2_full_data_pipeline_en_5.5.0_3.0_1725668347822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rseq2_full_data_pipeline_en_5.5.0_3.0_1725668347822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rseq2_full_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rseq2_full_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rseq2_full_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|449.3 MB| + +## References + +https://huggingface.co/C-Stuti/Rseq2_full_data + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-run1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-run1_pipeline_en.md new file mode 100644 index 00000000000000..2eb34a9df35bb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-run1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English run1_pipeline pipeline MarianTransformer from mptrigo +author: John Snow Labs +name: run1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`run1_pipeline` is a English model originally trained by mptrigo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/run1_pipeline_en_5.5.0_3.0_1725741354413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/run1_pipeline_en_5.5.0_3.0_1725741354413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("run1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("run1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|run1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|358.7 MB| + +## References + +https://huggingface.co/mptrigo/run1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sbert_imdb_en.md b/docs/_posts/ahmedlone127/2024-09-07-sbert_imdb_en.md new file mode 100644 index 00000000000000..64996c43f58e49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sbert_imdb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sbert_imdb MPNetForSequenceClassification from Siki-77 +author: John Snow Labs +name: sbert_imdb +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, mpnet] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sbert_imdb` is a English model originally trained by Siki-77. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sbert_imdb_en_5.5.0_3.0_1725733659378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sbert_imdb_en_5.5.0_3.0_1725733659378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = MPNetForSequenceClassification.pretrained("sbert_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = MPNetForSequenceClassification.pretrained("sbert_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sbert_imdb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Siki-77/sbert_imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..c8d6ed0a59a25d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline pipeline XlmRoBertaForSequenceClassification from haryoaw +author: John Snow Labs +name: scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline` is a English model originally trained by haryoaw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725711997424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725711997424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scenario_non_kd_from_scratch_data_hate_speech_filipino_model_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|882.9 MB| + +## References + +https://huggingface.co/haryoaw/scenario-non-kd-from-scratch-data-hate_speech_filipino-model-xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md new file mode 100644 index 00000000000000..43ca74a7ab9910 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_afro_xlmr_base_finetuned_kintweetsc_pipeline pipeline XlmRoBertaSentenceEmbeddings from RogerB +author: John Snow Labs +name: sent_afro_xlmr_base_finetuned_kintweetsc_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afro_xlmr_base_finetuned_kintweetsc_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725714721100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_base_finetuned_kintweetsc_pipeline_en_5.5.0_3.0_1725714721100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_afro_xlmr_base_finetuned_kintweetsc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_afro_xlmr_base_finetuned_kintweetsc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afro_xlmr_base_finetuned_kintweetsc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-base-finetuned-kintweetsC + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en.md new file mode 100644 index 00000000000000..f42c5b634a9abf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_afro_xlmr_base_finetuned_kintweetsd_pipeline pipeline XlmRoBertaSentenceEmbeddings from RogerB +author: John Snow Labs +name: sent_afro_xlmr_base_finetuned_kintweetsd_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_afro_xlmr_base_finetuned_kintweetsd_pipeline` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725684502511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_afro_xlmr_base_finetuned_kintweetsd_pipeline_en_5.5.0_3.0_1725684502511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_afro_xlmr_base_finetuned_kintweetsd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_afro_xlmr_base_finetuned_kintweetsd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_afro_xlmr_base_finetuned_kintweetsd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-base-finetuned-kintweetsD + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_albert_persian_farsi_zwnj_base_v2_fa.md b/docs/_posts/ahmedlone127/2024-09-07-sent_albert_persian_farsi_zwnj_base_v2_fa.md new file mode 100644 index 00000000000000..94540ccb7619e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_albert_persian_farsi_zwnj_base_v2_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian sent_albert_persian_farsi_zwnj_base_v2 BertSentenceEmbeddings from HooshvareLab +author: John Snow Labs +name: sent_albert_persian_farsi_zwnj_base_v2 +date: 2024-09-07 +tags: [fa, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_albert_persian_farsi_zwnj_base_v2` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_albert_persian_farsi_zwnj_base_v2_fa_5.5.0_3.0_1725724749663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_albert_persian_farsi_zwnj_base_v2_fa_5.5.0_3.0_1725724749663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_albert_persian_farsi_zwnj_base_v2","fa") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_albert_persian_farsi_zwnj_base_v2","fa") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_albert_persian_farsi_zwnj_base_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|fa| +|Size:|41.8 MB| + +## References + +https://huggingface.co/HooshvareLab/albert-fa-zwnj-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_alephbertgimmel_base_512_pipeline_he.md b/docs/_posts/ahmedlone127/2024-09-07-sent_alephbertgimmel_base_512_pipeline_he.md new file mode 100644 index 00000000000000..27a61b80be09d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_alephbertgimmel_base_512_pipeline_he.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Hebrew sent_alephbertgimmel_base_512_pipeline pipeline BertSentenceEmbeddings from imvladikon +author: John Snow Labs +name: sent_alephbertgimmel_base_512_pipeline +date: 2024-09-07 +tags: [he, open_source, pipeline, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_alephbertgimmel_base_512_pipeline` is a Hebrew model originally trained by imvladikon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_alephbertgimmel_base_512_pipeline_he_5.5.0_3.0_1725701028737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_alephbertgimmel_base_512_pipeline_he_5.5.0_3.0_1725701028737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_alephbertgimmel_base_512_pipeline", lang = "he") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_alephbertgimmel_base_512_pipeline", lang = "he") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_alephbertgimmel_base_512_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|he| +|Size:|691.0 MB| + +## References + +https://huggingface.co/imvladikon/alephbertgimmel-base-512 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_pipeline_xx.md new file mode 100644 index 00000000000000..977947b8850c23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_bernice_pipeline pipeline XlmRoBertaSentenceEmbeddings from jhu-clsp +author: John Snow Labs +name: sent_bernice_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bernice_pipeline` is a Multilingual model originally trained by jhu-clsp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bernice_pipeline_xx_5.5.0_3.0_1725738964495.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bernice_pipeline_xx_5.5.0_3.0_1725738964495.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bernice_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bernice_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bernice_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|655.2 MB| + +## References + +https://huggingface.co/jhu-clsp/bernice + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_xx.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_xx.md new file mode 100644 index 00000000000000..f2fbe02fe55f11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bernice_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_bernice XlmRoBertaSentenceEmbeddings from jhu-clsp +author: John Snow Labs +name: sent_bernice +date: 2024-09-07 +tags: [xx, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bernice` is a Multilingual model originally trained by jhu-clsp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bernice_xx_5.5.0_3.0_1725738782317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bernice_xx_5.5.0_3.0_1725738782317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_bernice","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_bernice","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bernice| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|654.7 MB| + +## References + +https://huggingface.co/jhu-clsp/bernice \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_1890_1900_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_1890_1900_en.md new file mode 100644 index 00000000000000..c60e7c12ff26b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_1890_1900_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_1890_1900 BertSentenceEmbeddings from Livingwithmachines +author: John Snow Labs +name: sent_bert_1890_1900 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_1890_1900` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_1890_1900_en_5.5.0_3.0_1725701110609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_1890_1900_en_5.5.0_3.0_1725701110609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_1890_1900","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_1890_1900","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_1890_1900| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1890_1900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..cc93ea76e9de2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_cased_finetuned_mrpc BertSentenceEmbeddings from google-bert +author: John Snow Labs +name: sent_bert_base_cased_finetuned_mrpc +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_cased_finetuned_mrpc` is a English model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_cased_finetuned_mrpc_en_5.5.0_3.0_1725701119060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_cased_finetuned_mrpc_en_5.5.0_3.0_1725701119060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_cased_finetuned_mrpc","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_cased_finetuned_mrpc","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_cased_finetuned_mrpc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/google-bert/bert-base-cased-finetuned-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_pipeline_en.md new file mode 100644 index 00000000000000..d014e7fc4b6fb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_cased_finetuned_mrpc_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_cased_finetuned_mrpc_pipeline pipeline BertSentenceEmbeddings from google-bert +author: John Snow Labs +name: sent_bert_base_cased_finetuned_mrpc_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_cased_finetuned_mrpc_pipeline` is a English model originally trained by google-bert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_cased_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725701137059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_cased_finetuned_mrpc_pipeline_en_5.5.0_3.0_1725701137059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_cased_finetuned_mrpc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_cased_finetuned_mrpc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_cased_finetuned_mrpc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|404.2 MB| + +## References + +https://huggingface.co/google-bert/bert-base-cased-finetuned-mrpc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_german_cased_oldvocab_de.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_german_cased_oldvocab_de.md new file mode 100644 index 00000000000000..fc9f586f0c8e93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_german_cased_oldvocab_de.md @@ -0,0 +1,94 @@ +--- +layout: model +title: German sent_bert_base_german_cased_oldvocab BertSentenceEmbeddings from deepset +author: John Snow Labs +name: sent_bert_base_german_cased_oldvocab +date: 2024-09-07 +tags: [de, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_german_cased_oldvocab` is a German model originally trained by deepset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_cased_oldvocab_de_5.5.0_3.0_1725725374269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_german_cased_oldvocab_de_5.5.0_3.0_1725725374269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_german_cased_oldvocab","de") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_german_cased_oldvocab","de") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_german_cased_oldvocab| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/deepset/bert-base-german-cased-oldvocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_id.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_id.md new file mode 100644 index 00000000000000..bac7a063e8812b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian sent_bert_base_indonesian_522m BertSentenceEmbeddings from cahya +author: John Snow Labs +name: sent_bert_base_indonesian_522m +date: 2024-09-07 +tags: [id, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_indonesian_522m` is a Indonesian model originally trained by cahya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_indonesian_522m_id_5.5.0_3.0_1725700303465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_indonesian_522m_id_5.5.0_3.0_1725700303465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_indonesian_522m","id") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_indonesian_522m","id") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_indonesian_522m| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|412.5 MB| + +## References + +https://huggingface.co/cahya/bert-base-indonesian-522M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_pipeline_id.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_pipeline_id.md new file mode 100644 index 00000000000000..9fe2905b85ceb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_indonesian_522m_pipeline_id.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Indonesian sent_bert_base_indonesian_522m_pipeline pipeline BertSentenceEmbeddings from cahya +author: John Snow Labs +name: sent_bert_base_indonesian_522m_pipeline +date: 2024-09-07 +tags: [id, open_source, pipeline, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_indonesian_522m_pipeline` is a Indonesian model originally trained by cahya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_indonesian_522m_pipeline_id_5.5.0_3.0_1725700321919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_indonesian_522m_pipeline_id_5.5.0_3.0_1725700321919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_indonesian_522m_pipeline", lang = "id") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_indonesian_522m_pipeline", lang = "id") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_indonesian_522m_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|id| +|Size:|413.1 MB| + +## References + +https://huggingface.co/cahya/bert-base-indonesian-522M + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_en.md new file mode 100644 index 00000000000000..d41970629fe751 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_irish_cased_v1 BertSentenceEmbeddings from DCU-NLP +author: John Snow Labs +name: sent_bert_base_irish_cased_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_irish_cased_v1` is a English model originally trained by DCU-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_irish_cased_v1_en_5.5.0_3.0_1725700376092.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_irish_cased_v1_en_5.5.0_3.0_1725700376092.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_irish_cased_v1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_irish_cased_v1","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_irish_cased_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/DCU-NLP/bert-base-irish-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_pipeline_en.md new file mode 100644 index 00000000000000..59a51ce561224c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_irish_cased_v1_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_irish_cased_v1_pipeline pipeline BertSentenceEmbeddings from DCU-NLP +author: John Snow Labs +name: sent_bert_base_irish_cased_v1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_irish_cased_v1_pipeline` is a English model originally trained by DCU-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_irish_cased_v1_pipeline_en_5.5.0_3.0_1725700394485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_irish_cased_v1_pipeline_en_5.5.0_3.0_1725700394485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_irish_cased_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_irish_cased_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_irish_cased_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/DCU-NLP/bert-base-irish-cased-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_magicslabnu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_magicslabnu_pipeline_en.md new file mode 100644 index 00000000000000..b104c2af53192b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_magicslabnu_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_magicslabnu_pipeline pipeline BertSentenceEmbeddings from magicslabnu +author: John Snow Labs +name: sent_bert_base_magicslabnu_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_magicslabnu_pipeline` is a English model originally trained by magicslabnu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_magicslabnu_pipeline_en_5.5.0_3.0_1725700882644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_magicslabnu_pipeline_en_5.5.0_3.0_1725700882644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_magicslabnu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_magicslabnu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_magicslabnu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.9 MB| + +## References + +https://huggingface.co/magicslabnu/BERT_base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_polish_uncased_v1_pl.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_polish_uncased_v1_pl.md new file mode 100644 index 00000000000000..08dbdf85417a2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_polish_uncased_v1_pl.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Polish sent_bert_base_polish_uncased_v1 BertSentenceEmbeddings from dkleczek +author: John Snow Labs +name: sent_bert_base_polish_uncased_v1 +date: 2024-09-07 +tags: [pl, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: pl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_polish_uncased_v1` is a Polish model originally trained by dkleczek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_polish_uncased_v1_pl_5.5.0_3.0_1725700379675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_polish_uncased_v1_pl_5.5.0_3.0_1725700379675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_polish_uncased_v1","pl") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_polish_uncased_v1","pl") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_polish_uncased_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pl| +|Size:|493.6 MB| + +## References + +https://huggingface.co/dkleczek/bert-base-polish-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_en.md new file mode 100644 index 00000000000000..20cf30cd1e5a05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_uncased_finetuned_char_hangman BertSentenceEmbeddings from bhagasra-saurav +author: John Snow Labs +name: sent_bert_base_uncased_finetuned_char_hangman +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_uncased_finetuned_char_hangman` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_finetuned_char_hangman_en_5.5.0_3.0_1725748658488.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_finetuned_char_hangman_en_5.5.0_3.0_1725748658488.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_uncased_finetuned_char_hangman","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_uncased_finetuned_char_hangman","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_uncased_finetuned_char_hangman| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bhagasra-saurav/bert-base-uncased-finetuned-char-hangman \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_pipeline_en.md new file mode 100644 index 00000000000000..9287a512ec76d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_uncased_finetuned_char_hangman_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bert_base_uncased_finetuned_char_hangman_pipeline pipeline BertSentenceEmbeddings from bhagasra-saurav +author: John Snow Labs +name: sent_bert_base_uncased_finetuned_char_hangman_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_uncased_finetuned_char_hangman_pipeline` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_finetuned_char_hangman_pipeline_en_5.5.0_3.0_1725748677340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_uncased_finetuned_char_hangman_pipeline_en_5.5.0_3.0_1725748677340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_base_uncased_finetuned_char_hangman_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_base_uncased_finetuned_char_hangman_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_uncased_finetuned_char_hangman_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/bhagasra-saurav/bert-base-uncased-finetuned-char-hangman + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_zhtw_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_zhtw_en.md new file mode 100644 index 00000000000000..28287b09315cb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_base_zhtw_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bert_base_zhtw BertSentenceEmbeddings from yentinglin +author: John Snow Labs +name: sent_bert_base_zhtw +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_base_zhtw` is a English model originally trained by yentinglin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_base_zhtw_en_5.5.0_3.0_1725748612635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_base_zhtw_en_5.5.0_3.0_1725748612635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_zhtw","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bert_base_zhtw","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_base_zhtw| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/yentinglin/bert-base-zhtw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt.md new file mode 100644 index 00000000000000..1d91ef5e50c235 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Portuguese sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline pipeline BertSentenceEmbeddings from stjiris +author: John Snow Labs +name: sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline +date: 2024-09-07 +tags: [pt, open_source, pipeline, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline` is a Portuguese model originally trained by stjiris. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt_5.5.0_3.0_1725700667694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline_pt_5.5.0_3.0_1725700667694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/stjiris/bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-MetaKD-v0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt.md new file mode 100644 index 00000000000000..6bac86e6a5269d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt.md @@ -0,0 +1,80 @@ +--- +layout: model +title: Portuguese Legal BERT Sentence Embedding Large Cased model +author: John Snow Labs +name: sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0 +date: 2024-09-07 +tags: [bert, pt, embeddings, sentence, open_source, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Legal BERT Sentence Embedding model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-MetaKD-v0` is a Portuguese model originally trained by `stjiris`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt_5.5.0_3.0_1725700611804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0_pt_5.5.0_3.0_1725700611804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0", "pt") \ + .setInputCols("sentence") \ + .setOutputCol("bert_sentence") + + nlp_pipeline = Pipeline(stages=[document_assembler, sentence_detector, sent_embeddings ]) + result = pipeline.fit(data).transform(data) +``` +```scala +val sent_embeddings = BertSentenceEmbeddings.pretrained("sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0", "pt") + .setInputCols("sentence") + .setOutputCol("bert_sentence") + + val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, sent_embeddings )) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bert_bert_large_portuguese_cased_legal_mlm_gpl_nli_sts_MetaKD_v0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +References + +- https://huggingface.co/stjiris/bert-large-portuguese-cased-legal-mlm-gpl-nli-sts-MetaKD-v0 +- https://github.com/rufimelo99/metadata-knowledge-distillation +- https://github.com/rufimelo99 +- https://rufimelo99.github.io/SemanticSearchSystemForSTJ/ +- https://www.SBERT.net +- https://www.inesc-id.pt/projects/PR07005/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pipeline_pt.md new file mode 100644 index 00000000000000..a0bbcf285f1081 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pipeline_pt.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Portuguese sent_bertabaporu_large_uncased_pipeline pipeline BertSentenceEmbeddings from pablocosta +author: John Snow Labs +name: sent_bertabaporu_large_uncased_pipeline +date: 2024-09-07 +tags: [pt, open_source, pipeline, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bertabaporu_large_uncased_pipeline` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bertabaporu_large_uncased_pipeline_pt_5.5.0_3.0_1725700550791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bertabaporu_large_uncased_pipeline_pt_5.5.0_3.0_1725700550791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bertabaporu_large_uncased_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bertabaporu_large_uncased_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bertabaporu_large_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|1.4 GB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-large-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pt.md new file mode 100644 index 00000000000000..8f7476228e49cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bertabaporu_large_uncased_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese sent_bertabaporu_large_uncased BertSentenceEmbeddings from pablocosta +author: John Snow Labs +name: sent_bertabaporu_large_uncased +date: 2024-09-07 +tags: [pt, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bertabaporu_large_uncased` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bertabaporu_large_uncased_pt_5.5.0_3.0_1725700486033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bertabaporu_large_uncased_pt_5.5.0_3.0_1725700486033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bertabaporu_large_uncased","pt") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bertabaporu_large_uncased","pt") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bertabaporu_large_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.4 GB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bertislav_pipeline_cu.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bertislav_pipeline_cu.md new file mode 100644 index 00000000000000..a7ba5099ca09e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bertislav_pipeline_cu.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic sent_bertislav_pipeline pipeline BertSentenceEmbeddings from npedrazzini +author: John Snow Labs +name: sent_bertislav_pipeline +date: 2024-09-07 +tags: [cu, open_source, pipeline, onnx] +task: Embeddings +language: cu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bertislav_pipeline` is a Church Slavic, Church Slavonic, Old Bulgarian, Old Church Slavonic, Old Slavonic model originally trained by npedrazzini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bertislav_pipeline_cu_5.5.0_3.0_1725724811280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bertislav_pipeline_cu_5.5.0_3.0_1725724811280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bertislav_pipeline", lang = "cu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bertislav_pipeline", lang = "cu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bertislav_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|cu| +|Size:|667.5 MB| + +## References + +https://huggingface.co/npedrazzini/BERTislav + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_pipeline_tr.md new file mode 100644 index 00000000000000..9ca6d243a5606b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_pipeline_tr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Turkish sent_berturk_legal_pipeline pipeline BertSentenceEmbeddings from KocLab-Bilkent +author: John Snow Labs +name: sent_berturk_legal_pipeline +date: 2024-09-07 +tags: [tr, open_source, pipeline, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_berturk_legal_pipeline` is a Turkish model originally trained by KocLab-Bilkent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_berturk_legal_pipeline_tr_5.5.0_3.0_1725725310382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_berturk_legal_pipeline_tr_5.5.0_3.0_1725725310382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_berturk_legal_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_berturk_legal_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_berturk_legal_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|689.9 MB| + +## References + +https://huggingface.co/KocLab-Bilkent/BERTurk-Legal + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_tr.md new file mode 100644 index 00000000000000..f64b4a134cbda0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_berturk_legal_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish sent_berturk_legal BertSentenceEmbeddings from KocLab-Bilkent +author: John Snow Labs +name: sent_berturk_legal +date: 2024-09-07 +tags: [tr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_berturk_legal` is a Turkish model originally trained by KocLab-Bilkent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_berturk_legal_tr_5.5.0_3.0_1725725278309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_berturk_legal_tr_5.5.0_3.0_1725725278309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_berturk_legal","tr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_berturk_legal","tr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_berturk_legal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|689.4 MB| + +## References + +https://huggingface.co/KocLab-Bilkent/BERTurk-Legal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_en.md new file mode 100644 index 00000000000000..38b36ff60011d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_bio_tinybert BertSentenceEmbeddings from nlpie +author: John Snow Labs +name: sent_bio_tinybert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bio_tinybert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bio_tinybert_en_5.5.0_3.0_1725736719974.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bio_tinybert_en_5.5.0_3.0_1725736719974.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_bio_tinybert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_bio_tinybert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bio_tinybert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|53.8 MB| + +## References + +https://huggingface.co/nlpie/bio-tinybert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_pipeline_en.md new file mode 100644 index 00000000000000..8a70bc4e9754a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_bio_tinybert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_bio_tinybert_pipeline pipeline BertSentenceEmbeddings from nlpie +author: John Snow Labs +name: sent_bio_tinybert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_bio_tinybert_pipeline` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_bio_tinybert_pipeline_en_5.5.0_3.0_1725736723036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_bio_tinybert_pipeline_en_5.5.0_3.0_1725736723036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_bio_tinybert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_bio_tinybert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_bio_tinybert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|54.3 MB| + +## References + +https://huggingface.co/nlpie/bio-tinybert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_en.md new file mode 100644 index 00000000000000..47ad3fe554edb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_biomednlp_biomedbert_large_uncased_abstract BertSentenceEmbeddings from microsoft +author: John Snow Labs +name: sent_biomednlp_biomedbert_large_uncased_abstract +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_biomednlp_biomedbert_large_uncased_abstract` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_biomednlp_biomedbert_large_uncased_abstract_en_5.5.0_3.0_1725725073666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_biomednlp_biomedbert_large_uncased_abstract_en_5.5.0_3.0_1725725073666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_biomednlp_biomedbert_large_uncased_abstract","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_biomednlp_biomedbert_large_uncased_abstract","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_biomednlp_biomedbert_large_uncased_abstract| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-BiomedBERT-large-uncased-abstract \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en.md new file mode 100644 index 00000000000000..91c70dedb546dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_biomednlp_biomedbert_large_uncased_abstract_pipeline pipeline BertSentenceEmbeddings from microsoft +author: John Snow Labs +name: sent_biomednlp_biomedbert_large_uncased_abstract_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_biomednlp_biomedbert_large_uncased_abstract_pipeline` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en_5.5.0_3.0_1725725128670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_biomednlp_biomedbert_large_uncased_abstract_pipeline_en_5.5.0_3.0_1725725128670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_biomednlp_biomedbert_large_uncased_abstract_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_biomednlp_biomedbert_large_uncased_abstract_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_biomednlp_biomedbert_large_uncased_abstract_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-BiomedBERT-large-uncased-abstract + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_en.md new file mode 100644 index 00000000000000..0d8c234c7b794a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_checkpoint_11600 XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_11600 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_11600` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_11600_en_5.5.0_3.0_1725713938811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_11600_en_5.5.0_3.0_1725713938811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_11600","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_11600","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_11600| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-11600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_pipeline_en.md new file mode 100644 index 00000000000000..7e11ef6d781a02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_11600_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_checkpoint_11600_pipeline pipeline XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_11600_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_11600_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_11600_pipeline_en_5.5.0_3.0_1725713986902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_11600_pipeline_en_5.5.0_3.0_1725713986902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_checkpoint_11600_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_checkpoint_11600_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_11600_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-11600 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_en.md new file mode 100644 index 00000000000000..34b567ab66f6ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_checkpoint_13600 XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_13600 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_13600` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_13600_en_5.5.0_3.0_1725737749107.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_13600_en_5.5.0_3.0_1725737749107.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_13600","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_checkpoint_13600","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_13600| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-13600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_pipeline_en.md new file mode 100644 index 00000000000000..e8b99306a852b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_checkpoint_13600_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_checkpoint_13600_pipeline pipeline XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_checkpoint_13600_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_checkpoint_13600_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_checkpoint_13600_pipeline_en_5.5.0_3.0_1725737795083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_checkpoint_13600_pipeline_en_5.5.0_3.0_1725737795083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_checkpoint_13600_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_checkpoint_13600_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_checkpoint_13600_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/checkpoint-13600 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_cocodr_base_msmarco_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_cocodr_base_msmarco_en.md new file mode 100644 index 00000000000000..8f3b01a94d191a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_cocodr_base_msmarco_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_cocodr_base_msmarco BertSentenceEmbeddings from OpenMatch +author: John Snow Labs +name: sent_cocodr_base_msmarco +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_cocodr_base_msmarco` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_cocodr_base_msmarco_en_5.5.0_3.0_1725700362459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_cocodr_base_msmarco_en_5.5.0_3.0_1725700362459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_cocodr_base_msmarco","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_cocodr_base_msmarco","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_cocodr_base_msmarco| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/OpenMatch/cocodr-base-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_pipeline_tr.md new file mode 100644 index 00000000000000..2d48333679ce3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_pipeline_tr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Turkish sent_convbert_base_turkish_mc4_cased_pipeline pipeline BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_convbert_base_turkish_mc4_cased_pipeline +date: 2024-09-07 +tags: [tr, open_source, pipeline, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_convbert_base_turkish_mc4_cased_pipeline` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_cased_pipeline_tr_5.5.0_3.0_1725736653787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_cased_pipeline_tr_5.5.0_3.0_1725736653787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_convbert_base_turkish_mc4_cased_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_convbert_base_turkish_mc4_cased_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_convbert_base_turkish_mc4_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|400.6 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_tr.md new file mode 100644 index 00000000000000..a7b790e93d8644 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_convbert_base_turkish_mc4_cased_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish sent_convbert_base_turkish_mc4_cased BertSentenceEmbeddings from dbmdz +author: John Snow Labs +name: sent_convbert_base_turkish_mc4_cased +date: 2024-09-07 +tags: [tr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_convbert_base_turkish_mc4_cased` is a Turkish model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_cased_tr_5.5.0_3.0_1725736635802.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_convbert_base_turkish_mc4_cased_tr_5.5.0_3.0_1725736635802.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_convbert_base_turkish_mc4_cased","tr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_convbert_base_turkish_mc4_cased","tr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_convbert_base_turkish_mc4_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|400.0 MB| + +## References + +https://huggingface.co/dbmdz/convbert-base-turkish-mc4-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_custominlawbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_custominlawbert_en.md new file mode 100644 index 00000000000000..fef52168a712da --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_custominlawbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_custominlawbert BertSentenceEmbeddings from law-ai +author: John Snow Labs +name: sent_custominlawbert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_custominlawbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_custominlawbert_en_5.5.0_3.0_1725724698073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_custominlawbert_en_5.5.0_3.0_1725724698073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_custominlawbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_custominlawbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_custominlawbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.5 MB| + +## References + +https://huggingface.co/law-ai/CustomInLawBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_distil_clinicalbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_distil_clinicalbert_en.md new file mode 100644 index 00000000000000..e0cac7194f3d5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_distil_clinicalbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_distil_clinicalbert BertSentenceEmbeddings from nlpie +author: John Snow Labs +name: sent_distil_clinicalbert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_distil_clinicalbert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_distil_clinicalbert_en_5.5.0_3.0_1725700822618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_distil_clinicalbert_en_5.5.0_3.0_1725700822618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_distil_clinicalbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_distil_clinicalbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_distil_clinicalbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.5 MB| + +## References + +https://huggingface.co/nlpie/distil-clinicalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_eq_bert_v1_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_eq_bert_v1_1_en.md new file mode 100644 index 00000000000000..ccfeebadc31c65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_eq_bert_v1_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_eq_bert_v1_1 BertSentenceEmbeddings from RyotaroOKabe +author: John Snow Labs +name: sent_eq_bert_v1_1 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_eq_bert_v1_1` is a English model originally trained by RyotaroOKabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_eq_bert_v1_1_en_5.5.0_3.0_1725748610858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_eq_bert_v1_1_en_5.5.0_3.0_1725748610858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_eq_bert_v1_1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_eq_bert_v1_1","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_eq_bert_v1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/RyotaroOKabe/eq_bert_v1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_financialbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_financialbert_en.md new file mode 100644 index 00000000000000..93e21d2c39b43b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_financialbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_financialbert BertSentenceEmbeddings from ahmedrachid +author: John Snow Labs +name: sent_financialbert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_financialbert` is a English model originally trained by ahmedrachid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_financialbert_en_5.5.0_3.0_1725700739650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_financialbert_en_5.5.0_3.0_1725700739650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_financialbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_financialbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_financialbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ahmedrachid/FinancialBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_en.md new file mode 100644 index 00000000000000..1d2140f10c2312 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_fp_xlm XlmRoBertaSentenceEmbeddings from Sadia2000 +author: John Snow Labs +name: sent_fp_xlm +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_fp_xlm` is a English model originally trained by Sadia2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_fp_xlm_en_5.5.0_3.0_1725680926111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_fp_xlm_en_5.5.0_3.0_1725680926111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_fp_xlm","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_fp_xlm","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_fp_xlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Sadia2000/fp_xlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_pipeline_en.md new file mode 100644 index 00000000000000..4b3dacb84b10ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_fp_xlm_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_fp_xlm_pipeline pipeline XlmRoBertaSentenceEmbeddings from Sadia2000 +author: John Snow Labs +name: sent_fp_xlm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_fp_xlm_pipeline` is a English model originally trained by Sadia2000. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_fp_xlm_pipeline_en_5.5.0_3.0_1725680977560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_fp_xlm_pipeline_en_5.5.0_3.0_1725680977560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_fp_xlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_fp_xlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_fp_xlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Sadia2000/fp_xlm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_georgian_corpus_model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_georgian_corpus_model_pipeline_en.md new file mode 100644 index 00000000000000..30f0a4176de3e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_georgian_corpus_model_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_georgian_corpus_model_pipeline pipeline BertSentenceEmbeddings from RichNachos +author: John Snow Labs +name: sent_georgian_corpus_model_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_georgian_corpus_model_pipeline` is a English model originally trained by RichNachos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_georgian_corpus_model_pipeline_en_5.5.0_3.0_1725724796708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_georgian_corpus_model_pipeline_en_5.5.0_3.0_1725724796708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_georgian_corpus_model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_georgian_corpus_model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_georgian_corpus_model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|107.7 MB| + +## References + +https://huggingface.co/RichNachos/georgian-corpus-model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_max_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_max_en.md new file mode 100644 index 00000000000000..7244f24e57e11f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_max_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_glot500_with_transliteration_max XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_glot500_with_transliteration_max +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_glot500_with_transliteration_max` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_glot500_with_transliteration_max_en_5.5.0_3.0_1725682010445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_glot500_with_transliteration_max_en_5.5.0_3.0_1725682010445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_glot500_with_transliteration_max","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_glot500_with_transliteration_max","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_glot500_with_transliteration_max| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/yihongLiu/glot500-with-transliteration-max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_minangkabau_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_minangkabau_pipeline_en.md new file mode 100644 index 00000000000000..fd7ce975b7e7f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_glot500_with_transliteration_minangkabau_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_glot500_with_transliteration_minangkabau_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_glot500_with_transliteration_minangkabau_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_glot500_with_transliteration_minangkabau_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_glot500_with_transliteration_minangkabau_pipeline_en_5.5.0_3.0_1725714714516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_glot500_with_transliteration_minangkabau_pipeline_en_5.5.0_3.0_1725714714516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_glot500_with_transliteration_minangkabau_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_glot500_with_transliteration_minangkabau_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_glot500_with_transliteration_minangkabau_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.9 GB| + +## References + +https://huggingface.co/yihongLiu/glot500-with-transliteration-min + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_hindi_bert_v2_hi.md b/docs/_posts/ahmedlone127/2024-09-07-sent_hindi_bert_v2_hi.md new file mode 100644 index 00000000000000..352788f329fefc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_hindi_bert_v2_hi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Hindi sent_hindi_bert_v2 BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_hindi_bert_v2 +date: 2024-09-07 +tags: [hi, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hindi_bert_v2` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hindi_bert_v2_hi_5.5.0_3.0_1725700712787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hindi_bert_v2_hi_5.5.0_3.0_1725700712787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hindi_bert_v2","hi") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hindi_bert_v2","hi") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hindi_bert_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_hing_bert_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-07-sent_hing_bert_pipeline_hi.md new file mode 100644 index 00000000000000..42358d4589b328 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_hing_bert_pipeline_hi.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Hindi sent_hing_bert_pipeline pipeline BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_hing_bert_pipeline +date: 2024-09-07 +tags: [hi, open_source, pipeline, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hing_bert_pipeline` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hing_bert_pipeline_hi_5.5.0_3.0_1725725046761.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hing_bert_pipeline_hi_5.5.0_3.0_1725725046761.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hing_bert_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hing_bert_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hing_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|407.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_en.md new file mode 100644 index 00000000000000..aa7639227dacf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_hinglish_bert_nirantk BertSentenceEmbeddings from nirantk +author: John Snow Labs +name: sent_hinglish_bert_nirantk +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hinglish_bert_nirantk` is a English model originally trained by nirantk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_nirantk_en_5.5.0_3.0_1725667367483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_nirantk_en_5.5.0_3.0_1725667367483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_hinglish_bert_nirantk","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_hinglish_bert_nirantk","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hinglish_bert_nirantk| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/nirantk/hinglish-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_pipeline_en.md new file mode 100644 index 00000000000000..62225e7d055956 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_hinglish_bert_nirantk_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_hinglish_bert_nirantk_pipeline pipeline BertSentenceEmbeddings from nirantk +author: John Snow Labs +name: sent_hinglish_bert_nirantk_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_hinglish_bert_nirantk_pipeline` is a English model originally trained by nirantk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_nirantk_pipeline_en_5.5.0_3.0_1725667398131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_hinglish_bert_nirantk_pipeline_en_5.5.0_3.0_1725667398131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_hinglish_bert_nirantk_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_hinglish_bert_nirantk_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_hinglish_bert_nirantk_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|665.6 MB| + +## References + +https://huggingface.co/nirantk/hinglish-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_en.md new file mode 100644 index 00000000000000..1e31cd97b506f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_issuebert_large BertSentenceEmbeddings from gbkwon +author: John Snow Labs +name: sent_issuebert_large +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_issuebert_large` is a English model originally trained by gbkwon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_issuebert_large_en_5.5.0_3.0_1725701160873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_issuebert_large_en_5.5.0_3.0_1725701160873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_issuebert_large","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_issuebert_large","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_issuebert_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/gbkwon/issueBERT-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_pipeline_en.md new file mode 100644 index 00000000000000..ee40d6cde7b372 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_issuebert_large_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_issuebert_large_pipeline pipeline BertSentenceEmbeddings from gbkwon +author: John Snow Labs +name: sent_issuebert_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_issuebert_large_pipeline` is a English model originally trained by gbkwon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_issuebert_large_pipeline_en_5.5.0_3.0_1725701215154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_issuebert_large_pipeline_en_5.5.0_3.0_1725701215154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_issuebert_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_issuebert_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_issuebert_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/gbkwon/issueBERT-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_legalbert_large_1_7m_2_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbert_large_1_7m_2_en.md new file mode 100644 index 00000000000000..5ad6fe5c5687f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbert_large_1_7m_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_legalbert_large_1_7m_2 BertSentenceEmbeddings from pile-of-law +author: John Snow Labs +name: sent_legalbert_large_1_7m_2 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legalbert_large_1_7m_2` is a English model originally trained by pile-of-law. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legalbert_large_1_7m_2_en_5.5.0_3.0_1725725109249.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legalbert_large_1_7m_2_en_5.5.0_3.0_1725725109249.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legalbert_large_1_7m_2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legalbert_large_1_7m_2","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legalbert_large_1_7m_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|645.3 MB| + +## References + +https://huggingface.co/pile-of-law/legalbert-large-1.7M-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_en.md new file mode 100644 index 00000000000000..16b4a9b8c71cb4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_legalbertpt_sardinian BertSentenceEmbeddings from raquelsilveira +author: John Snow Labs +name: sent_legalbertpt_sardinian +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legalbertpt_sardinian` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legalbertpt_sardinian_en_5.5.0_3.0_1725724901486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legalbertpt_sardinian_en_5.5.0_3.0_1725724901486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legalbertpt_sardinian","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legalbertpt_sardinian","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legalbertpt_sardinian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|425.1 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_sc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_pipeline_en.md new file mode 100644 index 00000000000000..d951bdd600ad16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_legalbertpt_sardinian_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_legalbertpt_sardinian_pipeline pipeline BertSentenceEmbeddings from raquelsilveira +author: John Snow Labs +name: sent_legalbertpt_sardinian_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legalbertpt_sardinian_pipeline` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legalbertpt_sardinian_pipeline_en_5.5.0_3.0_1725724922205.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legalbertpt_sardinian_pipeline_en_5.5.0_3.0_1725724922205.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_legalbertpt_sardinian_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_legalbertpt_sardinian_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legalbertpt_sardinian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|425.6 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_sc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pipeline_pt.md new file mode 100644 index 00000000000000..2a1081d81388b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pipeline_pt.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Portuguese sent_legalnlp_bert_pipeline pipeline BertSentenceEmbeddings from felipemaiapolo +author: John Snow Labs +name: sent_legalnlp_bert_pipeline +date: 2024-09-07 +tags: [pt, open_source, pipeline, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legalnlp_bert_pipeline` is a Portuguese model originally trained by felipemaiapolo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legalnlp_bert_pipeline_pt_5.5.0_3.0_1725725258155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legalnlp_bert_pipeline_pt_5.5.0_3.0_1725725258155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_legalnlp_bert_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_legalnlp_bert_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legalnlp_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|406.4 MB| + +## References + +https://huggingface.co/felipemaiapolo/legalnlp-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pt.md b/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pt.md new file mode 100644 index 00000000000000..178bf2f40363b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_legalnlp_bert_pt.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Portuguese sent_legalnlp_bert BertSentenceEmbeddings from felipemaiapolo +author: John Snow Labs +name: sent_legalnlp_bert +date: 2024-09-07 +tags: [pt, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_legalnlp_bert` is a Portuguese model originally trained by felipemaiapolo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_legalnlp_bert_pt_5.5.0_3.0_1725725238350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_legalnlp_bert_pt_5.5.0_3.0_1725725238350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_legalnlp_bert","pt") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_legalnlp_bert","pt") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_legalnlp_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.8 MB| + +## References + +https://huggingface.co/felipemaiapolo/legalnlp-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_manubert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_manubert_pipeline_en.md new file mode 100644 index 00000000000000..d3ea39b5b81c47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_manubert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_manubert_pipeline pipeline BertSentenceEmbeddings from akumar33 +author: John Snow Labs +name: sent_manubert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_manubert_pipeline` is a English model originally trained by akumar33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_manubert_pipeline_en_5.5.0_3.0_1725749067710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_manubert_pipeline_en_5.5.0_3.0_1725749067710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_manubert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_manubert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_manubert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/akumar33/ManuBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_mr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_mr.md new file mode 100644 index 00000000000000..3b9fd6ab6ae81b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_mr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Marathi sent_marathi_bert BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_marathi_bert +date: 2024-09-07 +tags: [mr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marathi_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marathi_bert_mr_5.5.0_3.0_1725700244473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marathi_bert_mr_5.5.0_3.0_1725700244473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_marathi_bert","mr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_marathi_bert","mr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marathi_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|665.1 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_pipeline_mr.md new file mode 100644 index 00000000000000..137c2c31abfa23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_marathi_bert_pipeline_mr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Marathi sent_marathi_bert_pipeline pipeline BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_marathi_bert_pipeline +date: 2024-09-07 +tags: [mr, open_source, pipeline, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marathi_bert_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marathi_bert_pipeline_mr_5.5.0_3.0_1725700274404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marathi_bert_pipeline_mr_5.5.0_3.0_1725700274404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_marathi_bert_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_marathi_bert_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marathi_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|665.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_ar.md b/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_ar.md new file mode 100644 index 00000000000000..b61a785ef7b384 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_ar.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Arabic sent_marbertv2 BertSentenceEmbeddings from UBC-NLP +author: John Snow Labs +name: sent_marbertv2 +date: 2024-09-07 +tags: [ar, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marbertv2` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marbertv2_ar_5.5.0_3.0_1725700941246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marbertv2_ar_5.5.0_3.0_1725700941246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_marbertv2","ar") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_marbertv2","ar") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marbertv2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|606.5 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERTv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_pipeline_ar.md b/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_pipeline_ar.md new file mode 100644 index 00000000000000..76445b19a2e414 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_marbertv2_pipeline_ar.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Arabic sent_marbertv2_pipeline pipeline BertSentenceEmbeddings from UBC-NLP +author: John Snow Labs +name: sent_marbertv2_pipeline +date: 2024-09-07 +tags: [ar, open_source, pipeline, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_marbertv2_pipeline` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_marbertv2_pipeline_ar_5.5.0_3.0_1725700969942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_marbertv2_pipeline_ar_5.5.0_3.0_1725700969942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_marbertv2_pipeline", lang = "ar") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_marbertv2_pipeline", lang = "ar") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_marbertv2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ar| +|Size:|607.1 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERTv2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_en.md new file mode 100644 index 00000000000000..9b6dd4014a0c9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_mattpuscibert BertSentenceEmbeddings from lfoppiano +author: John Snow Labs +name: sent_mattpuscibert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mattpuscibert` is a English model originally trained by lfoppiano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mattpuscibert_en_5.5.0_3.0_1725700493464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mattpuscibert_en_5.5.0_3.0_1725700493464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_mattpuscibert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_mattpuscibert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mattpuscibert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/lfoppiano/MatTPUSciBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_pipeline_en.md new file mode 100644 index 00000000000000..83d39c480c342c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mattpuscibert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_mattpuscibert_pipeline pipeline BertSentenceEmbeddings from lfoppiano +author: John Snow Labs +name: sent_mattpuscibert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mattpuscibert_pipeline` is a English model originally trained by lfoppiano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mattpuscibert_pipeline_en_5.5.0_3.0_1725700511682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mattpuscibert_pipeline_en_5.5.0_3.0_1725700511682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_mattpuscibert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_mattpuscibert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mattpuscibert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|410.5 MB| + +## References + +https://huggingface.co/lfoppiano/MatTPUSciBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_mr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_mr.md new file mode 100644 index 00000000000000..b0ef13f173888e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_mr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Marathi sent_me_bert_mixed_v2 BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_me_bert_mixed_v2 +date: 2024-09-07 +tags: [mr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_me_bert_mixed_v2` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_me_bert_mixed_v2_mr_5.5.0_3.0_1725737288449.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_me_bert_mixed_v2_mr_5.5.0_3.0_1725737288449.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_me_bert_mixed_v2","mr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_me_bert_mixed_v2","mr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_me_bert_mixed_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/me-bert-mixed-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_pipeline_mr.md new file mode 100644 index 00000000000000..0113748c5b4749 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_me_bert_mixed_v2_pipeline_mr.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Marathi sent_me_bert_mixed_v2_pipeline pipeline BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_me_bert_mixed_v2_pipeline +date: 2024-09-07 +tags: [mr, open_source, pipeline, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_me_bert_mixed_v2_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_me_bert_mixed_v2_pipeline_mr_5.5.0_3.0_1725737327063.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_me_bert_mixed_v2_pipeline_mr_5.5.0_3.0_1725737327063.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_me_bert_mixed_v2_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_me_bert_mixed_v2_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_me_bert_mixed_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|891.2 MB| + +## References + +https://huggingface.co/l3cube-pune/me-bert-mixed-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_medbert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_medbert_pipeline_en.md new file mode 100644 index 00000000000000..c97bfa7f93c466 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_medbert_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_medbert_pipeline pipeline BertSentenceEmbeddings from Charangan +author: John Snow Labs +name: sent_medbert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_medbert_pipeline` is a English model originally trained by Charangan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_medbert_pipeline_en_5.5.0_3.0_1725725564791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_medbert_pipeline_en_5.5.0_3.0_1725725564791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_medbert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_medbert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_medbert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/Charangan/MedBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mika_safeaerobert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mika_safeaerobert_en.md new file mode 100644 index 00000000000000..68b32a8136dd98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mika_safeaerobert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_mika_safeaerobert BertSentenceEmbeddings from NASA-AIML +author: John Snow Labs +name: sent_mika_safeaerobert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mika_safeaerobert` is a English model originally trained by NASA-AIML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mika_safeaerobert_en_5.5.0_3.0_1725737163795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mika_safeaerobert_en_5.5.0_3.0_1725737163795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_mika_safeaerobert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_mika_safeaerobert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mika_safeaerobert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/NASA-AIML/MIKA_SafeAeroBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mizbert_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mizbert_en.md new file mode 100644 index 00000000000000..3b55612ef14a6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mizbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_mizbert BertSentenceEmbeddings from robzchhangte +author: John Snow Labs +name: sent_mizbert +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mizbert` is a English model originally trained by robzchhangte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mizbert_en_5.5.0_3.0_1725724587354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mizbert_en_5.5.0_3.0_1725724587354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_mizbert","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_mizbert","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mizbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/robzchhangte/MizBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_en.md new file mode 100644 index 00000000000000..e2d490f9c6bbd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_mlm_xlmr_base_vlsp XlmRoBertaSentenceEmbeddings from vietn +author: John Snow Labs +name: sent_mlm_xlmr_base_vlsp +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mlm_xlmr_base_vlsp` is a English model originally trained by vietn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mlm_xlmr_base_vlsp_en_5.5.0_3.0_1725681501221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mlm_xlmr_base_vlsp_en_5.5.0_3.0_1725681501221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_mlm_xlmr_base_vlsp","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_mlm_xlmr_base_vlsp","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mlm_xlmr_base_vlsp| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/vietn/mlm-xlmr_base-vlsp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_pipeline_en.md new file mode 100644 index 00000000000000..c8129d7cc46a17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_mlm_xlmr_base_vlsp_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_mlm_xlmr_base_vlsp_pipeline pipeline XlmRoBertaSentenceEmbeddings from vietn +author: John Snow Labs +name: sent_mlm_xlmr_base_vlsp_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_mlm_xlmr_base_vlsp_pipeline` is a English model originally trained by vietn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_mlm_xlmr_base_vlsp_pipeline_en_5.5.0_3.0_1725681553633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_mlm_xlmr_base_vlsp_pipeline_en_5.5.0_3.0_1725681553633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_mlm_xlmr_base_vlsp_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_mlm_xlmr_base_vlsp_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_mlm_xlmr_base_vlsp_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/vietn/mlm-xlmr_base-vlsp + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_en.md new file mode 100644 index 00000000000000..2d3b5550de67be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_model_21200 XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_model_21200 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_model_21200` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_model_21200_en_5.5.0_3.0_1725738050160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_model_21200_en_5.5.0_3.0_1725738050160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_model_21200","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_model_21200","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_model_21200| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/model_21200 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_pipeline_en.md new file mode 100644 index 00000000000000..484dcc3743cf06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_model_21200_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_model_21200_pipeline pipeline XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_model_21200_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_model_21200_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_model_21200_pipeline_en_5.5.0_3.0_1725738097566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_model_21200_pipeline_en_5.5.0_3.0_1725738097566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_model_21200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_model_21200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_model_21200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/model_21200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_en.md new file mode 100644 index 00000000000000..f3c3ce214c417c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_ofa_multi_100 XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_100 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_100` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_100_en_5.5.0_3.0_1725683152267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_100_en_5.5.0_3.0_1725683152267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_100","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_100","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_100| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_pipeline_en.md new file mode 100644 index 00000000000000..d06607dd1d2487 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_100_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_ofa_multi_100_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_100_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_100_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_100_pipeline_en_5.5.0_3.0_1725683221215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_100_pipeline_en_5.5.0_3.0_1725683221215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_ofa_multi_100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_ofa_multi_100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-100 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_en.md new file mode 100644 index 00000000000000..74822febc46092 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_ofa_multi_768 XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_768 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_768` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_768_en_5.5.0_3.0_1725683334716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_768_en_5.5.0_3.0_1725683334716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_768","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_ofa_multi_768","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_768| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-768 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_pipeline_en.md new file mode 100644 index 00000000000000..fc6b1f1cf1d280 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_ofa_multi_768_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_ofa_multi_768_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_ofa_multi_768_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_ofa_multi_768_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_768_pipeline_en_5.5.0_3.0_1725683403115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_ofa_multi_768_pipeline_en_5.5.0_3.0_1725683403115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_ofa_multi_768_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_ofa_multi_768_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_ofa_multi_768_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.5 GB| + +## References + +https://huggingface.co/yihongLiu/ofa-multi-768 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_patana_chilean_spanish_bert_es.md b/docs/_posts/ahmedlone127/2024-09-07-sent_patana_chilean_spanish_bert_es.md new file mode 100644 index 00000000000000..5a13bd641e98e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_patana_chilean_spanish_bert_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish sent_patana_chilean_spanish_bert BertSentenceEmbeddings from dccuchile +author: John Snow Labs +name: sent_patana_chilean_spanish_bert +date: 2024-09-07 +tags: [es, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_patana_chilean_spanish_bert` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_patana_chilean_spanish_bert_es_5.5.0_3.0_1725749147303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_patana_chilean_spanish_bert_es_5.5.0_3.0_1725749147303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_patana_chilean_spanish_bert","es") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_patana_chilean_spanish_bert","es") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_patana_chilean_spanish_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.4 MB| + +## References + +https://huggingface.co/dccuchile/patana-chilean-spanish-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_pharmbert_cased_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_pharmbert_cased_en.md new file mode 100644 index 00000000000000..4b7f2333e7ee71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_pharmbert_cased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_pharmbert_cased BertSentenceEmbeddings from Lianglab +author: John Snow Labs +name: sent_pharmbert_cased +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_pharmbert_cased` is a English model originally trained by Lianglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_pharmbert_cased_en_5.5.0_3.0_1725736635410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_pharmbert_cased_en_5.5.0_3.0_1725736635410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_pharmbert_cased","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_pharmbert_cased","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_pharmbert_cased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Lianglab/PharmBERT-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_en.md new file mode 100644 index 00000000000000..f9ca7c4826fd4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_retromae BertSentenceEmbeddings from Shitao +author: John Snow Labs +name: sent_retromae +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_retromae` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_retromae_en_5.5.0_3.0_1725700517308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_retromae_en_5.5.0_3.0_1725700517308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_retromae","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_retromae","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_retromae| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_distill_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_distill_en.md new file mode 100644 index 00000000000000..8dffd715d9da8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_distill_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_retromae_msmarco_distill BertSentenceEmbeddings from Shitao +author: John Snow Labs +name: sent_retromae_msmarco_distill +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_retromae_msmarco_distill` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_distill_en_5.5.0_3.0_1725749026335.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_distill_en_5.5.0_3.0_1725749026335.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_retromae_msmarco_distill","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_retromae_msmarco_distill","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_retromae_msmarco_distill| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO_distill \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_en.md new file mode 100644 index 00000000000000..95f58fea40bdf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_retromae_msmarco_finetune BertSentenceEmbeddings from Shitao +author: John Snow Labs +name: sent_retromae_msmarco_finetune +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_retromae_msmarco_finetune` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_finetune_en_5.5.0_3.0_1725736752162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_finetune_en_5.5.0_3.0_1725736752162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_retromae_msmarco_finetune","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_retromae_msmarco_finetune","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_retromae_msmarco_finetune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO_finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_pipeline_en.md new file mode 100644 index 00000000000000..2ccfb8a0c22ce3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_msmarco_finetune_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_retromae_msmarco_finetune_pipeline pipeline BertSentenceEmbeddings from Shitao +author: John Snow Labs +name: sent_retromae_msmarco_finetune_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_retromae_msmarco_finetune_pipeline` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_finetune_pipeline_en_5.5.0_3.0_1725736772790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_retromae_msmarco_finetune_pipeline_en_5.5.0_3.0_1725736772790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_retromae_msmarco_finetune_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_retromae_msmarco_finetune_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_retromae_msmarco_finetune_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO_finetune + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_pipeline_en.md new file mode 100644 index 00000000000000..d6b515c7f8b25a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_retromae_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_retromae_pipeline pipeline BertSentenceEmbeddings from Shitao +author: John Snow Labs +name: sent_retromae_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_retromae_pipeline` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_retromae_pipeline_en_5.5.0_3.0_1725700536017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_retromae_pipeline_en_5.5.0_3.0_1725700536017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_retromae_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_retromae_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_retromae_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_rxbert_v1_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_rxbert_v1_en.md new file mode 100644 index 00000000000000..12465838475f54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_rxbert_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_rxbert_v1 BertSentenceEmbeddings from seldas +author: John Snow Labs +name: sent_rxbert_v1 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_rxbert_v1` is a English model originally trained by seldas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_rxbert_v1_en_5.5.0_3.0_1725725397288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_rxbert_v1_en_5.5.0_3.0_1725725397288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_rxbert_v1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_rxbert_v1","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_rxbert_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.0 MB| + +## References + +https://huggingface.co/seldas/rxbert-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..5d910b71075cea --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_swiss_german_xlm_roberta_base XlmRoBertaSentenceEmbeddings from ZurichNLP +author: John Snow Labs +name: sent_swiss_german_xlm_roberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_swiss_german_xlm_roberta_base` is a English model originally trained by ZurichNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_swiss_german_xlm_roberta_base_en_5.5.0_3.0_1725713871127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_swiss_german_xlm_roberta_base_en_5.5.0_3.0_1725713871127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_swiss_german_xlm_roberta_base","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_swiss_german_xlm_roberta_base","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_swiss_german_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ZurichNLP/swiss-german-xlm-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..17da318c3ad46c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_swiss_german_xlm_roberta_base_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_swiss_german_xlm_roberta_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from ZurichNLP +author: John Snow Labs +name: sent_swiss_german_xlm_roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_swiss_german_xlm_roberta_base_pipeline` is a English model originally trained by ZurichNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_swiss_german_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725713917840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_swiss_german_xlm_roberta_base_pipeline_en_5.5.0_3.0_1725713917840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_swiss_german_xlm_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_swiss_german_xlm_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_swiss_german_xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ZurichNLP/swiss-german-xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_tb_xlm_r_fpt_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_tb_xlm_r_fpt_en.md new file mode 100644 index 00000000000000..b2cf1952270625 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_tb_xlm_r_fpt_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_tb_xlm_r_fpt XlmRoBertaSentenceEmbeddings from aplycaebous +author: John Snow Labs +name: sent_tb_xlm_r_fpt +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tb_xlm_r_fpt` is a English model originally trained by aplycaebous. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tb_xlm_r_fpt_en_5.5.0_3.0_1725738183980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tb_xlm_r_fpt_en_5.5.0_3.0_1725738183980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_tb_xlm_r_fpt","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_tb_xlm_r_fpt","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tb_xlm_r_fpt| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/aplycaebous/tb-XLM-R-fpt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_pipeline_vi.md b/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_pipeline_vi.md new file mode 100644 index 00000000000000..ac8f224f45b771 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_pipeline_vi.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Vietnamese sent_tech_roberta_pipeline pipeline XlmRoBertaSentenceEmbeddings from imta-ai +author: John Snow Labs +name: sent_tech_roberta_pipeline +date: 2024-09-07 +tags: [vi, open_source, pipeline, onnx] +task: Embeddings +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tech_roberta_pipeline` is a Vietnamese model originally trained by imta-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tech_roberta_pipeline_vi_5.5.0_3.0_1725681218515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tech_roberta_pipeline_vi_5.5.0_3.0_1725681218515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tech_roberta_pipeline", lang = "vi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tech_roberta_pipeline", lang = "vi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tech_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|vi| +|Size:|943.5 MB| + +## References + +https://huggingface.co/imta-ai/tech-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_vi.md b/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_vi.md new file mode 100644 index 00000000000000..648b4e083214b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_tech_roberta_vi.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Vietnamese sent_tech_roberta XlmRoBertaSentenceEmbeddings from imta-ai +author: John Snow Labs +name: sent_tech_roberta +date: 2024-09-07 +tags: [vi, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: vi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tech_roberta` is a Vietnamese model originally trained by imta-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tech_roberta_vi_5.5.0_3.0_1725681131702.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tech_roberta_vi_5.5.0_3.0_1725681131702.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_tech_roberta","vi") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_tech_roberta","vi") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tech_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|vi| +|Size:|942.9 MB| + +## References + +https://huggingface.co/imta-ai/tech-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bert_te.md b/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bert_te.md new file mode 100644 index 00000000000000..bf155b3df65b24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bert_te.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Telugu sent_telugu_bert BertSentenceEmbeddings from l3cube-pune +author: John Snow Labs +name: sent_telugu_bert +date: 2024-09-07 +tags: [te, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_telugu_bert` is a Telugu model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_telugu_bert_te_5.5.0_3.0_1725724624043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_telugu_bert_te_5.5.0_3.0_1725724624043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_telugu_bert","te") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_telugu_bert","te") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_telugu_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/telugu-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bertu_te.md b/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bertu_te.md new file mode 100644 index 00000000000000..298d9360540e3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_telugu_bertu_te.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Telugu sent_telugu_bertu BertSentenceEmbeddings from kuppuluri +author: John Snow Labs +name: sent_telugu_bertu +date: 2024-09-07 +tags: [te, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_telugu_bertu` is a Telugu model originally trained by kuppuluri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_telugu_bertu_te_5.5.0_3.0_1725700977891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_telugu_bertu_te_5.5.0_3.0_1725700977891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_telugu_bertu","te") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_telugu_bertu","te") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_telugu_bertu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|412.5 MB| + +## References + +https://huggingface.co/kuppuluri/telugu_bertu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_test999_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_test999_en.md new file mode 100644 index 00000000000000..6376dd5d515fdd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_test999_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_test999 XlmRoBertaSentenceEmbeddings from NbAiLabArchive +author: John Snow Labs +name: sent_test999 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_test999` is a English model originally trained by NbAiLabArchive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_test999_en_5.5.0_3.0_1725684440158.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_test999_en_5.5.0_3.0_1725684440158.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_test999","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_test999","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_test999| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLabArchive/test999 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_test999_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_test999_pipeline_en.md new file mode 100644 index 00000000000000..41648b291a2b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_test999_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_test999_pipeline pipeline XlmRoBertaSentenceEmbeddings from NbAiLabArchive +author: John Snow Labs +name: sent_test999_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_test999_pipeline` is a English model originally trained by NbAiLabArchive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_test999_pipeline_en_5.5.0_3.0_1725684487557.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_test999_pipeline_en_5.5.0_3.0_1725684487557.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_test999_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_test999_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_test999_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLabArchive/test999 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_pipeline_xx.md new file mode 100644 index 00000000000000..f8e0bb4e756b30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_pipeline_xx.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Multilingual sent_translit_ppa_mediterranean_pipeline pipeline XlmRoBertaSentenceEmbeddings from orxhelili +author: John Snow Labs +name: sent_translit_ppa_mediterranean_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_translit_ppa_mediterranean_pipeline` is a Multilingual model originally trained by orxhelili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_translit_ppa_mediterranean_pipeline_xx_5.5.0_3.0_1725682893426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_translit_ppa_mediterranean_pipeline_xx_5.5.0_3.0_1725682893426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_translit_ppa_mediterranean_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_translit_ppa_mediterranean_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_translit_ppa_mediterranean_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.5 GB| + +## References + +https://huggingface.co/orxhelili/translit_ppa_mediterranean + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_xx.md b/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_xx.md new file mode 100644 index 00000000000000..736a3f3161e919 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_translit_ppa_mediterranean_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual sent_translit_ppa_mediterranean XlmRoBertaSentenceEmbeddings from orxhelili +author: John Snow Labs +name: sent_translit_ppa_mediterranean +date: 2024-09-07 +tags: [xx, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_translit_ppa_mediterranean` is a Multilingual model originally trained by orxhelili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_translit_ppa_mediterranean_xx_5.5.0_3.0_1725682818613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_translit_ppa_mediterranean_xx_5.5.0_3.0_1725682818613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_translit_ppa_mediterranean","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_translit_ppa_mediterranean","xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_translit_ppa_mediterranean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|1.5 GB| + +## References + +https://huggingface.co/orxhelili/translit_ppa_mediterranean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_es.md b/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_es.md new file mode 100644 index 00000000000000..85d3aaa76cf24d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish sent_tulio_chilean_spanish_bert BertSentenceEmbeddings from dccuchile +author: John Snow Labs +name: sent_tulio_chilean_spanish_bert +date: 2024-09-07 +tags: [es, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tulio_chilean_spanish_bert` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tulio_chilean_spanish_bert_es_5.5.0_3.0_1725701011693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tulio_chilean_spanish_bert_es_5.5.0_3.0_1725701011693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_tulio_chilean_spanish_bert","es") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_tulio_chilean_spanish_bert","es") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tulio_chilean_spanish_bert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.3 MB| + +## References + +https://huggingface.co/dccuchile/tulio-chilean-spanish-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_pipeline_es.md new file mode 100644 index 00000000000000..6fd9b7cbd44bfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_tulio_chilean_spanish_bert_pipeline_es.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Castilian, Spanish sent_tulio_chilean_spanish_bert_pipeline pipeline BertSentenceEmbeddings from dccuchile +author: John Snow Labs +name: sent_tulio_chilean_spanish_bert_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_tulio_chilean_spanish_bert_pipeline` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_tulio_chilean_spanish_bert_pipeline_es_5.5.0_3.0_1725701030916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_tulio_chilean_spanish_bert_pipeline_es_5.5.0_3.0_1725701030916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_tulio_chilean_spanish_bert_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_tulio_chilean_spanish_bert_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_tulio_chilean_spanish_bert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.8 MB| + +## References + +https://huggingface.co/dccuchile/tulio-chilean-spanish-bert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_turkish_tiny_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sent_turkish_tiny_bert_uncased_tr.md new file mode 100644 index 00000000000000..f0383d9f024499 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_turkish_tiny_bert_uncased_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish sent_turkish_tiny_bert_uncased BertSentenceEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: sent_turkish_tiny_bert_uncased +date: 2024-09-07 +tags: [tr, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_turkish_tiny_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_turkish_tiny_bert_uncased_tr_5.5.0_3.0_1725700833778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_turkish_tiny_bert_uncased_tr_5.5.0_3.0_1725700833778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_turkish_tiny_bert_uncased","tr") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_turkish_tiny_bert_uncased","tr") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_turkish_tiny_bert_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|17.4 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-tiny-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_pipeline_uz.md b/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_pipeline_uz.md new file mode 100644 index 00000000000000..2d6e5aea967419 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_pipeline_uz.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Uzbek sent_uzbert_base_uncased_pipeline pipeline BertSentenceEmbeddings from coppercitylabs +author: John Snow Labs +name: sent_uzbert_base_uncased_pipeline +date: 2024-09-07 +tags: [uz, open_source, pipeline, onnx] +task: Embeddings +language: uz +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_uzbert_base_uncased_pipeline` is a Uzbek model originally trained by coppercitylabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_uzbert_base_uncased_pipeline_uz_5.5.0_3.0_1725724861123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_uzbert_base_uncased_pipeline_uz_5.5.0_3.0_1725724861123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_uzbert_base_uncased_pipeline", lang = "uz") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_uzbert_base_uncased_pipeline", lang = "uz") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_uzbert_base_uncased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|uz| +|Size:|407.4 MB| + +## References + +https://huggingface.co/coppercitylabs/uzbert-base-uncased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- BertSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_uz.md b/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_uz.md new file mode 100644 index 00000000000000..850365d8237b73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_uzbert_base_uncased_uz.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Uzbek sent_uzbert_base_uncased BertSentenceEmbeddings from coppercitylabs +author: John Snow Labs +name: sent_uzbert_base_uncased +date: 2024-09-07 +tags: [uz, open_source, onnx, sentence_embeddings, bert] +task: Embeddings +language: uz +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_uzbert_base_uncased` is a Uzbek model originally trained by coppercitylabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_uzbert_base_uncased_uz_5.5.0_3.0_1725724842119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_uzbert_base_uncased_uz_5.5.0_3.0_1725724842119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = BertSentenceEmbeddings.pretrained("sent_uzbert_base_uncased","uz") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = BertSentenceEmbeddings.pretrained("sent_uzbert_base_uncased","uz") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_uzbert_base_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|uz| +|Size:|406.9 MB| + +## References + +https://huggingface.co/coppercitylabs/uzbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_vien_resume_roberta_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_vien_resume_roberta_base_pipeline_en.md new file mode 100644 index 00000000000000..4318ad3dcffc48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_vien_resume_roberta_base_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_vien_resume_roberta_base_pipeline pipeline XlmRoBertaSentenceEmbeddings from thaidv96 +author: John Snow Labs +name: sent_vien_resume_roberta_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_vien_resume_roberta_base_pipeline` is a English model originally trained by thaidv96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_vien_resume_roberta_base_pipeline_en_5.5.0_3.0_1725714307322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_vien_resume_roberta_base_pipeline_en_5.5.0_3.0_1725714307322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_vien_resume_roberta_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_vien_resume_roberta_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_vien_resume_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/thaidv96/vien-resume-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_average_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_average_en.md new file mode 100644 index 00000000000000..372822b380a60b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_average_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_r_with_transliteration_average XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_xlm_r_with_transliteration_average +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_r_with_transliteration_average` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_average_en_5.5.0_3.0_1725714544540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_average_en_5.5.0_3.0_1725714544540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_r_with_transliteration_average","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_r_with_transliteration_average","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_r_with_transliteration_average| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|850.0 MB| + +## References + +https://huggingface.co/yihongLiu/xlm-r-with-transliteration-average \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_minangkabau_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_minangkabau_en.md new file mode 100644 index 00000000000000..7f0011e08543e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_r_with_transliteration_minangkabau_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_r_with_transliteration_minangkabau XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_xlm_r_with_transliteration_minangkabau +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_r_with_transliteration_minangkabau` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_minangkabau_en_5.5.0_3.0_1725681438250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_minangkabau_en_5.5.0_3.0_1725681438250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_r_with_transliteration_minangkabau","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_r_with_transliteration_minangkabau","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_r_with_transliteration_minangkabau| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|843.2 MB| + +## References + +https://huggingface.co/yihongLiu/xlm-r-with-transliteration-min \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_en.md new file mode 100644 index 00000000000000..59b9ac58f5a821 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_1024_256 XlmRoBertaSentenceEmbeddings from kdercksen +author: John Snow Labs +name: sent_xlm_roberta_base_1024_256 +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_1024_256` is a English model originally trained by kdercksen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_1024_256_en_5.5.0_3.0_1725738225105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_1024_256_en_5.5.0_3.0_1725738225105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_1024_256","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_1024_256","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_1024_256| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|653.9 MB| + +## References + +https://huggingface.co/kdercksen/xlm-roberta-base-1024-256 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_pipeline_en.md new file mode 100644 index 00000000000000..ff665f08489b07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_1024_256_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_1024_256_pipeline pipeline XlmRoBertaSentenceEmbeddings from kdercksen +author: John Snow Labs +name: sent_xlm_roberta_base_1024_256_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_1024_256_pipeline` is a English model originally trained by kdercksen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_1024_256_pipeline_en_5.5.0_3.0_1725738407286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_1024_256_pipeline_en_5.5.0_3.0_1725738407286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_1024_256_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_1024_256_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_1024_256_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|654.4 MB| + +## References + +https://huggingface.co/kdercksen/xlm-roberta-base-1024-256 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_en.md new file mode 100644 index 00000000000000..4451333e56c0de --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_arlama XlmRoBertaSentenceEmbeddings from AfnanTS +author: John Snow Labs +name: sent_xlm_roberta_base_arlama +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_arlama` is a English model originally trained by AfnanTS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_arlama_en_5.5.0_3.0_1725681311317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_arlama_en_5.5.0_3.0_1725681311317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_arlama","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_arlama","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_arlama| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/AfnanTS/xlm-roberta-base_ArLAMA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_pipeline_en.md new file mode 100644 index 00000000000000..0490c6ca826aba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_arlama_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_arlama_pipeline pipeline XlmRoBertaSentenceEmbeddings from AfnanTS +author: John Snow Labs +name: sent_xlm_roberta_base_arlama_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_arlama_pipeline` is a English model originally trained by AfnanTS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_arlama_pipeline_en_5.5.0_3.0_1725681358935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_arlama_pipeline_en_5.5.0_3.0_1725681358935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_arlama_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_arlama_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_arlama_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/AfnanTS/xlm-roberta-base_ArLAMA + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_en.md new file mode 100644 index 00000000000000..ccffb0b497b9b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_lingala XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_lingala +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_lingala` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_lingala_en_5.5.0_3.0_1725738285699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_lingala_en_5.5.0_3.0_1725738285699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_lingala","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_lingala","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_lingala| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-lingala \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_pipeline_en.md new file mode 100644 index 00000000000000..d5bf2168c65120 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_lingala_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_lingala_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_lingala_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_lingala_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_lingala_pipeline_en_5.5.0_3.0_1725738332764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_lingala_pipeline_en_5.5.0_3.0_1725738332764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_lingala_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_lingala_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_lingala_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-lingala + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_rugo_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_rugo_en.md new file mode 100644 index 00000000000000..a515ff7dcd7718 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_rugo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_rugo XlmRoBertaSentenceEmbeddings from rugo +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_rugo +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_rugo` is a English model originally trained by rugo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_rugo_en_5.5.0_3.0_1725683005299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_rugo_en_5.5.0_3.0_1725683005299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_rugo","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_rugo","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_rugo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|987.4 MB| + +## References + +https://huggingface.co/rugo/xlm-roberta-base-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_en.md new file mode 100644 index 00000000000000..eb9cc845b395b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_somali XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_somali +date: 2024-09-07 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_somali` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_somali_en_5.5.0_3.0_1725714885509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_somali_en_5.5.0_3.0_1725714885509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_somali","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_somali","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_somali| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-somali \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_pipeline_en.md new file mode 100644 index 00000000000000..f0b5037d42604c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_xlm_roberta_base_finetuned_somali_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_somali_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_somali_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_somali_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_somali_pipeline_en_5.5.0_3.0_1725714931170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_somali_pipeline_en_5.5.0_3.0_1725714931170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_somali_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_somali_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_somali_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-somali + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sent_zabantu_sot_ven_170m_pipeline_ve.md b/docs/_posts/ahmedlone127/2024-09-07-sent_zabantu_sot_ven_170m_pipeline_ve.md new file mode 100644 index 00000000000000..c20b1ed1392391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sent_zabantu_sot_ven_170m_pipeline_ve.md @@ -0,0 +1,71 @@ +--- +layout: model +title: Venda sent_zabantu_sot_ven_170m_pipeline pipeline XlmRoBertaSentenceEmbeddings from dsfsi +author: John Snow Labs +name: sent_zabantu_sot_ven_170m_pipeline +date: 2024-09-07 +tags: [ve, open_source, pipeline, onnx] +task: Embeddings +language: ve +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_zabantu_sot_ven_170m_pipeline` is a Venda model originally trained by dsfsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_zabantu_sot_ven_170m_pipeline_ve_5.5.0_3.0_1725714933667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_zabantu_sot_ven_170m_pipeline_ve_5.5.0_3.0_1725714933667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_zabantu_sot_ven_170m_pipeline", lang = "ve") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_zabantu_sot_ven_170m_pipeline", lang = "ve") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_zabantu_sot_ven_170m_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ve| +|Size:|647.0 MB| + +## References + +https://huggingface.co/dsfsi/zabantu-sot-ven-170m + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentagram_pipeline_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sentagram_pipeline_tr.md new file mode 100644 index 00000000000000..b86c89cd19f792 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentagram_pipeline_tr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Turkish sentagram_pipeline pipeline BertForTokenClassification from synturk +author: John Snow Labs +name: sentagram_pipeline +date: 2024-09-07 +tags: [tr, open_source, pipeline, onnx] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentagram_pipeline` is a Turkish model originally trained by synturk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentagram_pipeline_tr_5.5.0_3.0_1725690942699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentagram_pipeline_tr_5.5.0_3.0_1725690942699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentagram_pipeline", lang = "tr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentagram_pipeline", lang = "tr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentagram_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tr| +|Size:|689.0 MB| + +## References + +https://huggingface.co/synturk/sentagram + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentagram_tr.md b/docs/_posts/ahmedlone127/2024-09-07-sentagram_tr.md new file mode 100644 index 00000000000000..524e98cef5e807 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentagram_tr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Turkish sentagram BertForTokenClassification from synturk +author: John Snow Labs +name: sentagram +date: 2024-09-07 +tags: [tr, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: tr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentagram` is a Turkish model originally trained by synturk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentagram_tr_5.5.0_3.0_1725690911877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentagram_tr_5.5.0_3.0_1725690911877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("sentagram","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("sentagram", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentagram| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|tr| +|Size:|689.0 MB| + +## References + +https://huggingface.co/synturk/sentagram \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentence_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-07-sentence_classifier_en.md new file mode 100644 index 00000000000000..f7915809f605e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentence_classifier_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English sentence_classifier MPNetEmbeddings from ilhkn +author: John Snow Labs +name: sentence_classifier +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_classifier` is a English model originally trained by ilhkn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_classifier_en_5.5.0_3.0_1725703723078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_classifier_en_5.5.0_3.0_1725703723078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("sentence_classifier","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("sentence_classifier","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ilhkn/sentence_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en.md b/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en.md new file mode 100644 index 00000000000000..86dd0227e8133d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss MPNetEmbeddings from ahessamb +author: John Snow Labs +name: sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss` is a English model originally trained by ahessamb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en_5.5.0_3.0_1725703590472.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_en_5.5.0_3.0_1725703590472.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ahessamb/sentence-transformers-all-mpnet-base-v2-2epoch-100pair-mar2-contrastiveloss \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en.md new file mode 100644 index 00000000000000..ebd7dfa8e46c05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline pipeline MPNetEmbeddings from ahessamb +author: John Snow Labs +name: sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline` is a English model originally trained by ahessamb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en_5.5.0_3.0_1725703614550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline_en_5.5.0_3.0_1725703614550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentence_transformers_all_mpnet_base_v2_2epoch_100pair_mar2_contrastiveloss_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ahessamb/sentence-transformers-all-mpnet-base-v2-2epoch-100pair-mar2-contrastiveloss + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_nachos_french_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_nachos_french_pipeline_en.md new file mode 100644 index 00000000000000..e9e235846a9d65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_nachos_french_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentencepiecebpe_nachos_french_pipeline pipeline CamemBertEmbeddings from BioMedTok +author: John Snow Labs +name: sentencepiecebpe_nachos_french_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencepiecebpe_nachos_french_pipeline` is a English model originally trained by BioMedTok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_nachos_french_pipeline_en_5.5.0_3.0_1725691320325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_nachos_french_pipeline_en_5.5.0_3.0_1725691320325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencepiecebpe_nachos_french_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencepiecebpe_nachos_french_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencepiecebpe_nachos_french_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/BioMedTok/SentencePieceBPE-NACHOS-FR + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_pubmed_french_morphemes_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_pubmed_french_morphemes_pipeline_en.md new file mode 100644 index 00000000000000..2839f48605cbe4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sentencepiecebpe_pubmed_french_morphemes_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sentencepiecebpe_pubmed_french_morphemes_pipeline pipeline CamemBertEmbeddings from BioMedTok +author: John Snow Labs +name: sentencepiecebpe_pubmed_french_morphemes_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentencepiecebpe_pubmed_french_morphemes_pipeline` is a English model originally trained by BioMedTok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_pubmed_french_morphemes_pipeline_en_5.5.0_3.0_1725691990354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentencepiecebpe_pubmed_french_morphemes_pipeline_en_5.5.0_3.0_1725691990354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sentencepiecebpe_pubmed_french_morphemes_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sentencepiecebpe_pubmed_french_morphemes_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentencepiecebpe_pubmed_french_morphemes_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|412.7 MB| + +## References + +https://huggingface.co/BioMedTok/SentencePieceBPE-PubMed-FR-Morphemes + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_en.md b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_en.md new file mode 100644 index 00000000000000..6bfb3e7aba36b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_calgary_epochs2_jul_15_2023 MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_calgary_epochs2_jul_15_2023 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_calgary_epochs2_jul_15_2023` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_calgary_epochs2_jul_15_2023_en_5.5.0_3.0_1725702872764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_calgary_epochs2_jul_15_2023_en_5.5.0_3.0_1725702872764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_calgary_epochs2_jul_15_2023","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_calgary_epochs2_jul_15_2023","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_calgary_epochs2_jul_15_2023| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit_model_Calgary_epochs2_Jul_15_2023 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_pipeline_en.md new file mode 100644 index 00000000000000..30c8d9829b775b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_calgary_epochs2_jul_15_2023_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_model_calgary_epochs2_jul_15_2023_pipeline pipeline MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_calgary_epochs2_jul_15_2023_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_calgary_epochs2_jul_15_2023_pipeline` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_calgary_epochs2_jul_15_2023_pipeline_en_5.5.0_3.0_1725702891480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_calgary_epochs2_jul_15_2023_pipeline_en_5.5.0_3.0_1725702891480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_model_calgary_epochs2_jul_15_2023_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_model_calgary_epochs2_jul_15_2023_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_calgary_epochs2_jul_15_2023_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit_model_Calgary_epochs2_Jul_15_2023 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_en.md b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_en.md new file mode 100644 index 00000000000000..c2caffe6fad497 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_independence_labelintl_epochs2 MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_independence_labelintl_epochs2 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_independence_labelintl_epochs2` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_independence_labelintl_epochs2_en_5.5.0_3.0_1725703295025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_independence_labelintl_epochs2_en_5.5.0_3.0_1725703295025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_independence_labelintl_epochs2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_independence_labelintl_epochs2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_independence_labelintl_epochs2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit_model_Independence_labelintl_epochs2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_pipeline_en.md new file mode 100644 index 00000000000000..dbf812094733cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-setfit_model_independence_labelintl_epochs2_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_model_independence_labelintl_epochs2_pipeline pipeline MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_independence_labelintl_epochs2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_independence_labelintl_epochs2_pipeline` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_independence_labelintl_epochs2_pipeline_en_5.5.0_3.0_1725703316253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_independence_labelintl_epochs2_pipeline_en_5.5.0_3.0_1725703316253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_model_independence_labelintl_epochs2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_model_independence_labelintl_epochs2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_independence_labelintl_epochs2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit_model_Independence_labelintl_epochs2 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_pipeline_si.md b/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_pipeline_si.md new file mode 100644 index 00000000000000..3c562df4a4808e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_pipeline_si.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Sinhala, Sinhalese sinhala_roberta_oscar_pipeline pipeline RoBertaEmbeddings from keshan +author: John Snow Labs +name: sinhala_roberta_oscar_pipeline +date: 2024-09-07 +tags: [si, open_source, pipeline, onnx] +task: Embeddings +language: si +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhala_roberta_oscar_pipeline` is a Sinhala, Sinhalese model originally trained by keshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhala_roberta_oscar_pipeline_si_5.5.0_3.0_1725673001350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhala_roberta_oscar_pipeline_si_5.5.0_3.0_1725673001350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sinhala_roberta_oscar_pipeline", lang = "si") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sinhala_roberta_oscar_pipeline", lang = "si") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhala_roberta_oscar_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|si| +|Size:|465.4 MB| + +## References + +https://huggingface.co/keshan/sinhala-roberta-oscar + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_si.md b/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_si.md new file mode 100644 index 00000000000000..fd913f4ff2361d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sinhala_roberta_oscar_si.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Sinhala, Sinhalese sinhala_roberta_oscar RoBertaEmbeddings from keshan +author: John Snow Labs +name: sinhala_roberta_oscar +date: 2024-09-07 +tags: [si, open_source, onnx, embeddings, roberta] +task: Embeddings +language: si +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhala_roberta_oscar` is a Sinhala, Sinhalese model originally trained by keshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhala_roberta_oscar_si_5.5.0_3.0_1725672979441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhala_roberta_oscar_si_5.5.0_3.0_1725672979441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("sinhala_roberta_oscar","si") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("sinhala_roberta_oscar","si") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhala_roberta_oscar| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|si| +|Size:|465.4 MB| + +## References + +https://huggingface.co/keshan/sinhala-roberta-oscar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sinhala_sentiment_analysis_sinbert_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-sinhala_sentiment_analysis_sinbert_large_en.md new file mode 100644 index 00000000000000..2f50e00cc7f7fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sinhala_sentiment_analysis_sinbert_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sinhala_sentiment_analysis_sinbert_large RoBertaForSequenceClassification from sinhala-nlp +author: John Snow Labs +name: sinhala_sentiment_analysis_sinbert_large +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sinhala_sentiment_analysis_sinbert_large` is a English model originally trained by sinhala-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sinhala_sentiment_analysis_sinbert_large_en_5.5.0_3.0_1725718157550.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sinhala_sentiment_analysis_sinbert_large_en_5.5.0_3.0_1725718157550.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("sinhala_sentiment_analysis_sinbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("sinhala_sentiment_analysis_sinbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sinhala_sentiment_analysis_sinbert_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|472.7 MB| + +## References + +https://huggingface.co/sinhala-nlp/sinhala-sentiment-analysis-sinbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_en.md b/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_en.md new file mode 100644 index 00000000000000..25842eaa03a2d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sloberta_20480_not_pretrained CamemBertEmbeddings from bcolnar +author: John Snow Labs +name: sloberta_20480_not_pretrained +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sloberta_20480_not_pretrained` is a English model originally trained by bcolnar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sloberta_20480_not_pretrained_en_5.5.0_3.0_1725692009446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sloberta_20480_not_pretrained_en_5.5.0_3.0_1725692009446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("sloberta_20480_not_pretrained","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("sloberta_20480_not_pretrained","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sloberta_20480_not_pretrained| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|300.5 MB| + +## References + +https://huggingface.co/bcolnar/sloberta-20480-not-pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_pipeline_en.md new file mode 100644 index 00000000000000..6aa81b42e05e16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sloberta_20480_not_pretrained_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sloberta_20480_not_pretrained_pipeline pipeline CamemBertEmbeddings from bcolnar +author: John Snow Labs +name: sloberta_20480_not_pretrained_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sloberta_20480_not_pretrained_pipeline` is a English model originally trained by bcolnar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sloberta_20480_not_pretrained_pipeline_en_5.5.0_3.0_1725692094433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sloberta_20480_not_pretrained_pipeline_en_5.5.0_3.0_1725692094433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sloberta_20480_not_pretrained_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sloberta_20480_not_pretrained_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sloberta_20480_not_pretrained_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|300.5 MB| + +## References + +https://huggingface.co/bcolnar/sloberta-20480-not-pretrained + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-smm4h2024_task1_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-smm4h2024_task1_roberta_pipeline_en.md new file mode 100644 index 00000000000000..9d6cd39c063224 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-smm4h2024_task1_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English smm4h2024_task1_roberta_pipeline pipeline RoBertaForTokenClassification from yseop +author: John Snow Labs +name: smm4h2024_task1_roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`smm4h2024_task1_roberta_pipeline` is a English model originally trained by yseop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/smm4h2024_task1_roberta_pipeline_en_5.5.0_3.0_1725667797931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/smm4h2024_task1_roberta_pipeline_en_5.5.0_3.0_1725667797931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("smm4h2024_task1_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("smm4h2024_task1_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|smm4h2024_task1_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|439.5 MB| + +## References + +https://huggingface.co/yseop/SMM4H2024_Task1_roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_pipeline_en.md new file mode 100644 index 00000000000000..b3dab2fbdfd426 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-somd_xlm_stage1_v2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English somd_xlm_stage1_v2_pipeline pipeline XlmRoBertaForTokenClassification from ThuyNT03 +author: John Snow Labs +name: somd_xlm_stage1_v2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`somd_xlm_stage1_v2_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/somd_xlm_stage1_v2_pipeline_en_5.5.0_3.0_1725687732591.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/somd_xlm_stage1_v2_pipeline_en_5.5.0_3.0_1725687732591.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("somd_xlm_stage1_v2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("somd_xlm_stage1_v2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|somd_xlm_stage1_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|797.6 MB| + +## References + +https://huggingface.co/ThuyNT03/SOMD-xlm-stage1-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sota_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-sota_3_en.md new file mode 100644 index 00000000000000..1d2b1baa14a917 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sota_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sota_3 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: sota_3 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sota_3` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sota_3_en_5.5.0_3.0_1725680275648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sota_3_en_5.5.0_3.0_1725680275648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("sota_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("sota_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sota_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/SOTA_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-sota_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-sota_3_pipeline_en.md new file mode 100644 index 00000000000000..3fe4411889ad63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-sota_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sota_3_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: sota_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sota_3_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sota_3_pipeline_en_5.5.0_3.0_1725680297463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sota_3_pipeline_en_5.5.0_3.0_1725680297463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sota_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sota_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sota_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/SOTA_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_en.md b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_en.md new file mode 100644 index 00000000000000..c159a0cdafc087 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English southern_sotho_all_mpnet_finetuned_comb_12481 MPNetEmbeddings from danfeg +author: John Snow Labs +name: southern_sotho_all_mpnet_finetuned_comb_12481 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_all_mpnet_finetuned_comb_12481` is a English model originally trained by danfeg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_12481_en_5.5.0_3.0_1725703438483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_12481_en_5.5.0_3.0_1725703438483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_comb_12481","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("southern_sotho_all_mpnet_finetuned_comb_12481","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_all_mpnet_finetuned_comb_12481| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/danfeg/ST-ALL-MPNET_Finetuned-COMB-12481 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en.md new file mode 100644 index 00000000000000..736e8fe8fda4e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English southern_sotho_all_mpnet_finetuned_comb_12481_pipeline pipeline MPNetEmbeddings from danfeg +author: John Snow Labs +name: southern_sotho_all_mpnet_finetuned_comb_12481_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_all_mpnet_finetuned_comb_12481_pipeline` is a English model originally trained by danfeg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en_5.5.0_3.0_1725703457212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_comb_12481_pipeline_en_5.5.0_3.0_1725703457212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("southern_sotho_all_mpnet_finetuned_comb_12481_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("southern_sotho_all_mpnet_finetuned_comb_12481_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_all_mpnet_finetuned_comb_12481_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/danfeg/ST-ALL-MPNET_Finetuned-COMB-12481 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en.md new file mode 100644 index 00000000000000..b5fe7ff0e3d3a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English southern_sotho_all_mpnet_finetuned_french_1000_pipeline pipeline MPNetEmbeddings from danfeg +author: John Snow Labs +name: southern_sotho_all_mpnet_finetuned_french_1000_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`southern_sotho_all_mpnet_finetuned_french_1000_pipeline` is a English model originally trained by danfeg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en_5.5.0_3.0_1725703747710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/southern_sotho_all_mpnet_finetuned_french_1000_pipeline_en_5.5.0_3.0_1725703747710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("southern_sotho_all_mpnet_finetuned_french_1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("southern_sotho_all_mpnet_finetuned_french_1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|southern_sotho_all_mpnet_finetuned_french_1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/danfeg/ST-ALL-MPNET_Finetuned-FR-1000 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-spanish_capitalization_punctuation_restoration_sanivert_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-spanish_capitalization_punctuation_restoration_sanivert_pipeline_es.md new file mode 100644 index 00000000000000..903147dc236bdc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-spanish_capitalization_punctuation_restoration_sanivert_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish spanish_capitalization_punctuation_restoration_sanivert_pipeline pipeline BertForTokenClassification from VOCALINLP +author: John Snow Labs +name: spanish_capitalization_punctuation_restoration_sanivert_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Named Entity Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_capitalization_punctuation_restoration_sanivert_pipeline` is a Castilian, Spanish model originally trained by VOCALINLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_capitalization_punctuation_restoration_sanivert_pipeline_es_5.5.0_3.0_1725726630748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_capitalization_punctuation_restoration_sanivert_pipeline_es_5.5.0_3.0_1725726630748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_capitalization_punctuation_restoration_sanivert_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_capitalization_punctuation_restoration_sanivert_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_capitalization_punctuation_restoration_sanivert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|409.8 MB| + +## References + +https://huggingface.co/VOCALINLP/spanish_capitalization_punctuation_restoration_sanivert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_all_quy_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_all_quy_1_pipeline_en.md new file mode 100644 index 00000000000000..ab356298bf0d99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_all_quy_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spanish_finnish_all_quy_1_pipeline pipeline MarianTransformer from nouman-10 +author: John Snow Labs +name: spanish_finnish_all_quy_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_finnish_all_quy_1_pipeline` is a English model originally trained by nouman-10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_finnish_all_quy_1_pipeline_en_5.5.0_3.0_1725747611277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_finnish_all_quy_1_pipeline_en_5.5.0_3.0_1725747611277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_finnish_all_quy_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_finnish_all_quy_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_finnish_all_quy_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|531.0 MB| + +## References + +https://huggingface.co/nouman-10/es_fi_all_quy_1 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_en.md b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_en.md new file mode 100644 index 00000000000000..8370ddb3cd3edc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English spanish_finnish_extra MarianTransformer from americasnlp-lct-ehu +author: John Snow Labs +name: spanish_finnish_extra +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_finnish_extra` is a English model originally trained by americasnlp-lct-ehu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_finnish_extra_en_5.5.0_3.0_1725741124647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_finnish_extra_en_5.5.0_3.0_1725741124647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("spanish_finnish_extra","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("spanish_finnish_extra","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_finnish_extra| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.7 MB| + +## References + +https://huggingface.co/americasnlp-lct-ehu/es_fi_extra \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_pipeline_en.md new file mode 100644 index 00000000000000..23a1e5cd5ece51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-spanish_finnish_extra_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spanish_finnish_extra_pipeline pipeline MarianTransformer from americasnlp-lct-ehu +author: John Snow Labs +name: spanish_finnish_extra_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_finnish_extra_pipeline` is a English model originally trained by americasnlp-lct-ehu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_finnish_extra_pipeline_en_5.5.0_3.0_1725741149484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_finnish_extra_pipeline_en_5.5.0_3.0_1725741149484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spanish_finnish_extra_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spanish_finnish_extra_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_finnish_extra_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|531.3 MB| + +## References + +https://huggingface.co/americasnlp-lct-ehu/es_fi_extra + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-spea_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-spea_3_pipeline_en.md new file mode 100644 index 00000000000000..0bf7c8479b9593 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-spea_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spea_3_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: spea_3_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spea_3_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spea_3_pipeline_en_5.5.0_3.0_1725679780598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spea_3_pipeline_en_5.5.0_3.0_1725679780598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spea_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spea_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spea_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Spea_3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ssdlm_en.md b/docs/_posts/ahmedlone127/2024-09-07-ssdlm_en.md new file mode 100644 index 00000000000000..11ce92dc021650 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ssdlm_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English ssdlm RoBertaEmbeddings from xhan77 +author: John Snow Labs +name: ssdlm +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssdlm` is a English model originally trained by xhan77. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssdlm_en_5.5.0_3.0_1725672742671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssdlm_en_5.5.0_3.0_1725672742671.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("ssdlm","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("ssdlm","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssdlm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/xhan77/ssdlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-ssdlm_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-ssdlm_pipeline_en.md new file mode 100644 index 00000000000000..407ffca9f6314d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-ssdlm_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English ssdlm_pipeline pipeline RoBertaEmbeddings from xhan77 +author: John Snow Labs +name: ssdlm_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssdlm_pipeline` is a English model originally trained by xhan77. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssdlm_pipeline_en_5.5.0_3.0_1725672804711.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssdlm_pipeline_en_5.5.0_3.0_1725672804711.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("ssdlm_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("ssdlm_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssdlm_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/xhan77/ssdlm + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_finetuned_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_finetuned_ner_en.md new file mode 100644 index 00000000000000..03afb2368d68e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_finetuned_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English stanford_deidentifier_base_finetuned_ner BertForTokenClassification from antoineedy +author: John Snow Labs +name: stanford_deidentifier_base_finetuned_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_base_finetuned_ner` is a English model originally trained by antoineedy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_base_finetuned_ner_en_5.5.0_3.0_1725735059859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_base_finetuned_ner_en_5.5.0_3.0_1725735059859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_base_finetuned_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_base_finetuned_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_base_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/antoineedy/stanford-deidentifier-base-finetuned-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_pipeline_en.md new file mode 100644 index 00000000000000..0c0510a3f0ecaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_base_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stanford_deidentifier_base_pipeline pipeline BertForTokenClassification from StanfordAIMI +author: John Snow Labs +name: stanford_deidentifier_base_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_base_pipeline` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_base_pipeline_en_5.5.0_3.0_1725690903456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_base_pipeline_en_5.5.0_3.0_1725690903456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stanford_deidentifier_base_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stanford_deidentifier_base_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/StanfordAIMI/stanford-deidentifier-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_en.md b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_en.md new file mode 100644 index 00000000000000..d54eb0f1d6c6f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English stanford_deidentifier_only_i2b2 BertForTokenClassification from StanfordAIMI +author: John Snow Labs +name: stanford_deidentifier_only_i2b2 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_only_i2b2` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_i2b2_en_5.5.0_3.0_1725701390025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_i2b2_en_5.5.0_3.0_1725701390025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_only_i2b2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_only_i2b2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_only_i2b2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/StanfordAIMI/stanford-deidentifier-only-i2b2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_pipeline_en.md new file mode 100644 index 00000000000000..764a7da6fd9df2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_i2b2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stanford_deidentifier_only_i2b2_pipeline pipeline BertForTokenClassification from StanfordAIMI +author: John Snow Labs +name: stanford_deidentifier_only_i2b2_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_only_i2b2_pipeline` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_i2b2_pipeline_en_5.5.0_3.0_1725701409136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_i2b2_pipeline_en_5.5.0_3.0_1725701409136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stanford_deidentifier_only_i2b2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stanford_deidentifier_only_i2b2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_only_i2b2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/StanfordAIMI/stanford-deidentifier-only-i2b2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_radiology_reports_en.md b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_radiology_reports_en.md new file mode 100644 index 00000000000000..94f3a8bb20e707 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stanford_deidentifier_only_radiology_reports_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English stanford_deidentifier_only_radiology_reports BertForTokenClassification from StanfordAIMI +author: John Snow Labs +name: stanford_deidentifier_only_radiology_reports +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stanford_deidentifier_only_radiology_reports` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_radiology_reports_en_5.5.0_3.0_1725726688838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stanford_deidentifier_only_radiology_reports_en_5.5.0_3.0_1725726688838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_only_radiology_reports","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("stanford_deidentifier_only_radiology_reports", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stanford_deidentifier_only_radiology_reports| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/StanfordAIMI/stanford-deidentifier-only-radiology-reports \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en.md new file mode 100644 index 00000000000000..21846f78f7bee1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline pipeline DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en_5.5.0_3.0_1725675072369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline_en_5.5.0_3.0_1725675072369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stego_classifier_checkpoint_epoch_30_2024_07_26_16_19_31_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/stego-classifier-checkpoint-epoch-30-2024-07-26_16-19-31 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-swiss_german_xlm_roberta_base_en.md b/docs/_posts/ahmedlone127/2024-09-07-swiss_german_xlm_roberta_base_en.md new file mode 100644 index 00000000000000..a81c7939f088f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-swiss_german_xlm_roberta_base_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English swiss_german_xlm_roberta_base XlmRoBertaEmbeddings from ZurichNLP +author: John Snow Labs +name: swiss_german_xlm_roberta_base +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`swiss_german_xlm_roberta_base` is a English model originally trained by ZurichNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/swiss_german_xlm_roberta_base_en_5.5.0_3.0_1725676745427.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/swiss_german_xlm_roberta_base_en_5.5.0_3.0_1725676745427.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("swiss_german_xlm_roberta_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("swiss_german_xlm_roberta_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|swiss_german_xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ZurichNLP/swiss-german-xlm-roberta-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-t_frex_roberta_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-t_frex_roberta_large_pipeline_en.md new file mode 100644 index 00000000000000..0913a356d09f4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-t_frex_roberta_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English t_frex_roberta_large_pipeline pipeline RoBertaForTokenClassification from quim-motger +author: John Snow Labs +name: t_frex_roberta_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`t_frex_roberta_large_pipeline` is a English model originally trained by quim-motger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/t_frex_roberta_large_pipeline_en_5.5.0_3.0_1725668167774.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/t_frex_roberta_large_pipeline_en_5.5.0_3.0_1725668167774.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("t_frex_roberta_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("t_frex_roberta_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|t_frex_roberta_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/quim-motger/t-frex-roberta-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-taiyi_roberta_124m_d_en.md b/docs/_posts/ahmedlone127/2024-09-07-taiyi_roberta_124m_d_en.md new file mode 100644 index 00000000000000..202ed7a9c8559f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-taiyi_roberta_124m_d_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English taiyi_roberta_124m_d RoBertaEmbeddings from IDEA-CCNL +author: John Snow Labs +name: taiyi_roberta_124m_d +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`taiyi_roberta_124m_d` is a English model originally trained by IDEA-CCNL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/taiyi_roberta_124m_d_en_5.5.0_3.0_1725672996332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/taiyi_roberta_124m_d_en_5.5.0_3.0_1725672996332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("taiyi_roberta_124m_d","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("taiyi_roberta_124m_d","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|taiyi_roberta_124m_d| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/IDEA-CCNL/Taiyi-Roberta-124M-D \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tajberto_pipeline_tg.md b/docs/_posts/ahmedlone127/2024-09-07-tajberto_pipeline_tg.md new file mode 100644 index 00000000000000..35c8c1498b6154 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tajberto_pipeline_tg.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Tajik tajberto_pipeline pipeline RoBertaEmbeddings from muhtasham +author: John Snow Labs +name: tajberto_pipeline +date: 2024-09-07 +tags: [tg, open_source, pipeline, onnx] +task: Embeddings +language: tg +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tajberto_pipeline` is a Tajik model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tajberto_pipeline_tg_5.5.0_3.0_1725673328834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tajberto_pipeline_tg_5.5.0_3.0_1725673328834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tajberto_pipeline", lang = "tg") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tajberto_pipeline", lang = "tg") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tajberto_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tg| +|Size:|311.7 MB| + +## References + +https://huggingface.co/muhtasham/TajBERTo + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tajberto_tg.md b/docs/_posts/ahmedlone127/2024-09-07-tajberto_tg.md new file mode 100644 index 00000000000000..53f5c566b19a77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tajberto_tg.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Tajik tajberto RoBertaEmbeddings from muhtasham +author: John Snow Labs +name: tajberto +date: 2024-09-07 +tags: [tg, open_source, onnx, embeddings, roberta] +task: Embeddings +language: tg +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tajberto` is a Tajik model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tajberto_tg_5.5.0_3.0_1725673314432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tajberto_tg_5.5.0_3.0_1725673314432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("tajberto","tg") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("tajberto","tg") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tajberto| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|tg| +|Size:|311.7 MB| + +## References + +https://huggingface.co/muhtasham/TajBERTo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-task_token_classification_distilbert_wnut_17_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-task_token_classification_distilbert_wnut_17_pipeline_en.md new file mode 100644 index 00000000000000..a9c936da961f81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-task_token_classification_distilbert_wnut_17_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English task_token_classification_distilbert_wnut_17_pipeline pipeline DistilBertForTokenClassification from tunggad +author: John Snow Labs +name: task_token_classification_distilbert_wnut_17_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`task_token_classification_distilbert_wnut_17_pipeline` is a English model originally trained by tunggad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/task_token_classification_distilbert_wnut_17_pipeline_en_5.5.0_3.0_1725739366989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/task_token_classification_distilbert_wnut_17_pipeline_en_5.5.0_3.0_1725739366989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("task_token_classification_distilbert_wnut_17_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("task_token_classification_distilbert_wnut_17_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|task_token_classification_distilbert_wnut_17_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/tunggad/task-token-classification-DistilBERT-wnut_17 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tesakantaibert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-tesakantaibert_pipeline_en.md new file mode 100644 index 00000000000000..c45a0cb985b03a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tesakantaibert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tesakantaibert_pipeline pipeline RoBertaEmbeddings from DipanAI +author: John Snow Labs +name: tesakantaibert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tesakantaibert_pipeline` is a English model originally trained by DipanAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tesakantaibert_pipeline_en_5.5.0_3.0_1725673558458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tesakantaibert_pipeline_en_5.5.0_3.0_1725673558458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tesakantaibert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tesakantaibert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tesakantaibert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|310.5 MB| + +## References + +https://huggingface.co/DipanAI/TesAKantaiBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_demo_qa_en.md b/docs/_posts/ahmedlone127/2024-09-07-test_demo_qa_en.md new file mode 100644 index 00000000000000..04a5822a1789c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_demo_qa_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_demo_qa DistilBertForQuestionAnswering from houyu0930 +author: John Snow Labs +name: test_demo_qa +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_demo_qa` is a English model originally trained by houyu0930. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_demo_qa_en_5.5.0_3.0_1725695732351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_demo_qa_en_5.5.0_3.0_1725695732351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("test_demo_qa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("test_demo_qa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_demo_qa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/houyu0930/test-demo-qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_esperberto_small_pipeline_eo.md b/docs/_posts/ahmedlone127/2024-09-07-test_esperberto_small_pipeline_eo.md new file mode 100644 index 00000000000000..7444ca9e6f923e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_esperberto_small_pipeline_eo.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Esperanto test_esperberto_small_pipeline pipeline RoBertaEmbeddings from pierric +author: John Snow Labs +name: test_esperberto_small_pipeline +date: 2024-09-07 +tags: [eo, open_source, pipeline, onnx] +task: Embeddings +language: eo +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_esperberto_small_pipeline` is a Esperanto model originally trained by pierric. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_esperberto_small_pipeline_eo_5.5.0_3.0_1725698755419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_esperberto_small_pipeline_eo_5.5.0_3.0_1725698755419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_esperberto_small_pipeline", lang = "eo") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_esperberto_small_pipeline", lang = "eo") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_esperberto_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|eo| +|Size:|308.6 MB| + +## References + +https://huggingface.co/pierric/test-EsperBERTo-small + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_en.md b/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_en.md new file mode 100644 index 00000000000000..ec09904191b2dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English test_qa_sanjeev_jasper DistilBertForQuestionAnswering from sanjeev-jasper +author: John Snow Labs +name: test_qa_sanjeev_jasper +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_qa_sanjeev_jasper` is a English model originally trained by sanjeev-jasper. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_qa_sanjeev_jasper_en_5.5.0_3.0_1725722501450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_qa_sanjeev_jasper_en_5.5.0_3.0_1725722501450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("test_qa_sanjeev_jasper","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("test_qa_sanjeev_jasper", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_qa_sanjeev_jasper| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sanjeev-jasper/test_qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_pipeline_en.md new file mode 100644 index 00000000000000..8d6cf1e24b03eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_qa_sanjeev_jasper_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English test_qa_sanjeev_jasper_pipeline pipeline DistilBertForQuestionAnswering from sanjeev-jasper +author: John Snow Labs +name: test_qa_sanjeev_jasper_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_qa_sanjeev_jasper_pipeline` is a English model originally trained by sanjeev-jasper. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_qa_sanjeev_jasper_pipeline_en_5.5.0_3.0_1725722513078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_qa_sanjeev_jasper_pipeline_en_5.5.0_3.0_1725722513078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_qa_sanjeev_jasper_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_qa_sanjeev_jasper_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_qa_sanjeev_jasper_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/sanjeev-jasper/test_qa + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_setfit_model_bhuvana_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-test_setfit_model_bhuvana_pipeline_en.md new file mode 100644 index 00000000000000..668b30691f1fd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_setfit_model_bhuvana_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English test_setfit_model_bhuvana_pipeline pipeline MPNetEmbeddings from Bhuvana +author: John Snow Labs +name: test_setfit_model_bhuvana_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_setfit_model_bhuvana_pipeline` is a English model originally trained by Bhuvana. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_setfit_model_bhuvana_pipeline_en_5.5.0_3.0_1725702895120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_setfit_model_bhuvana_pipeline_en_5.5.0_3.0_1725702895120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_setfit_model_bhuvana_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_setfit_model_bhuvana_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_setfit_model_bhuvana_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Bhuvana/test-setfit-model + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-test_w5_long_dataset_en.md b/docs/_posts/ahmedlone127/2024-09-07-test_w5_long_dataset_en.md new file mode 100644 index 00000000000000..881ee2463f60ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-test_w5_long_dataset_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English test_w5_long_dataset RoBertaEmbeddings from NbAiLabArchive +author: John Snow Labs +name: test_w5_long_dataset +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_w5_long_dataset` is a English model originally trained by NbAiLabArchive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_w5_long_dataset_en_5.5.0_3.0_1725673033193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_w5_long_dataset_en_5.5.0_3.0_1725673033193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("test_w5_long_dataset","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("test_w5_long_dataset","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_w5_long_dataset| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.0 MB| + +## References + +https://huggingface.co/NbAiLabArchive/test_w5_long_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-testchatbotmodel1_en.md b/docs/_posts/ahmedlone127/2024-09-07-testchatbotmodel1_en.md new file mode 100644 index 00000000000000..44394b23d20508 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-testchatbotmodel1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English testchatbotmodel1 DistilBertForQuestionAnswering from TheoND +author: John Snow Labs +name: testchatbotmodel1 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testchatbotmodel1` is a English model originally trained by TheoND. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testchatbotmodel1_en_5.5.0_3.0_1725695561331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testchatbotmodel1_en_5.5.0_3.0_1725695561331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("testchatbotmodel1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("testchatbotmodel1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testchatbotmodel1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/TheoND/testchatbotmodel1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-testing_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-testing_pipeline_en.md new file mode 100644 index 00000000000000..51829ccc06821a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-testing_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English testing_pipeline pipeline DistilBertForQuestionAnswering from Sybghat +author: John Snow Labs +name: testing_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testing_pipeline` is a English model originally trained by Sybghat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testing_pipeline_en_5.5.0_3.0_1725695536589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testing_pipeline_en_5.5.0_3.0_1725695536589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testing_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testing_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testing_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sybghat/Testing + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-testmodel_en.md b/docs/_posts/ahmedlone127/2024-09-07-testmodel_en.md new file mode 100644 index 00000000000000..f8ccc91d3dbc59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-testmodel_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English testmodel DistilBertForQuestionAnswering from petersirka +author: John Snow Labs +name: testmodel +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testmodel` is a English model originally trained by petersirka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testmodel_en_5.5.0_3.0_1725735986892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testmodel_en_5.5.0_3.0_1725735986892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("testmodel","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("testmodel", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testmodel| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/petersirka/testmodel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-testmodel_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-testmodel_pipeline_en.md new file mode 100644 index 00000000000000..7946086ec59dab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-testmodel_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English testmodel_pipeline pipeline DistilBertForQuestionAnswering from petersirka +author: John Snow Labs +name: testmodel_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testmodel_pipeline` is a English model originally trained by petersirka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testmodel_pipeline_en_5.5.0_3.0_1725736012558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testmodel_pipeline_en_5.5.0_3.0_1725736012558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testmodel_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testmodel_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testmodel_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/petersirka/testmodel + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-testtesttest_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-testtesttest_pipeline_en.md new file mode 100644 index 00000000000000..06fabe421e6c74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-testtesttest_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English testtesttest_pipeline pipeline MarianTransformer from wrchen1 +author: John Snow Labs +name: testtesttest_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testtesttest_pipeline` is a English model originally trained by wrchen1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testtesttest_pipeline_en_5.5.0_3.0_1725747809585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testtesttest_pipeline_en_5.5.0_3.0_1725747809585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("testtesttest_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("testtesttest_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testtesttest_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|509.0 MB| + +## References + +https://huggingface.co/wrchen1/testtesttest + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_en.md b/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_en.md new file mode 100644 index 00000000000000..ed75589ea20e03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English textfooler_roberta_base_mrpc_5 RoBertaForSequenceClassification from korca +author: John Snow Labs +name: textfooler_roberta_base_mrpc_5 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`textfooler_roberta_base_mrpc_5` is a English model originally trained by korca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/textfooler_roberta_base_mrpc_5_en_5.5.0_3.0_1725680076546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/textfooler_roberta_base_mrpc_5_en_5.5.0_3.0_1725680076546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("textfooler_roberta_base_mrpc_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("textfooler_roberta_base_mrpc_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|textfooler_roberta_base_mrpc_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|447.7 MB| + +## References + +https://huggingface.co/korca/textfooler-roberta-base-mrpc-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_pipeline_en.md new file mode 100644 index 00000000000000..4e8a9d442badaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-textfooler_roberta_base_mrpc_5_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English textfooler_roberta_base_mrpc_5_pipeline pipeline RoBertaForSequenceClassification from korca +author: John Snow Labs +name: textfooler_roberta_base_mrpc_5_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`textfooler_roberta_base_mrpc_5_pipeline` is a English model originally trained by korca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/textfooler_roberta_base_mrpc_5_pipeline_en_5.5.0_3.0_1725680099147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/textfooler_roberta_base_mrpc_5_pipeline_en_5.5.0_3.0_1725680099147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("textfooler_roberta_base_mrpc_5_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("textfooler_roberta_base_mrpc_5_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|textfooler_roberta_base_mrpc_5_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|447.7 MB| + +## References + +https://huggingface.co/korca/textfooler-roberta-base-mrpc-5 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tnana_english_thai_align_finetuned_en.md b/docs/_posts/ahmedlone127/2024-09-07-tnana_english_thai_align_finetuned_en.md new file mode 100644 index 00000000000000..f0d2801d394626 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tnana_english_thai_align_finetuned_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tnana_english_thai_align_finetuned MarianTransformer from SAI2-EXP +author: John Snow Labs +name: tnana_english_thai_align_finetuned +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tnana_english_thai_align_finetuned` is a English model originally trained by SAI2-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tnana_english_thai_align_finetuned_en_5.5.0_3.0_1725740709484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tnana_english_thai_align_finetuned_en_5.5.0_3.0_1725740709484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("tnana_english_thai_align_finetuned","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("tnana_english_thai_align_finetuned","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tnana_english_thai_align_finetuned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|530.3 MB| + +## References + +https://huggingface.co/SAI2-EXP/TNANA-en-th-align-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-token_classification_adilhayat173_en.md b/docs/_posts/ahmedlone127/2024-09-07-token_classification_adilhayat173_en.md new file mode 100644 index 00000000000000..b1072fed84ea2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-token_classification_adilhayat173_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English token_classification_adilhayat173 BertForTokenClassification from AdilHayat173 +author: John Snow Labs +name: token_classification_adilhayat173 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, bert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`token_classification_adilhayat173` is a English model originally trained by AdilHayat173. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/token_classification_adilhayat173_en_5.5.0_3.0_1725726605995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/token_classification_adilhayat173_en_5.5.0_3.0_1725726605995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = BertForTokenClassification.pretrained("token_classification_adilhayat173","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = BertForTokenClassification.pretrained("token_classification_adilhayat173", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|token_classification_adilhayat173| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/AdilHayat173/token_classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tone_en.md b/docs/_posts/ahmedlone127/2024-09-07-tone_en.md new file mode 100644 index 00000000000000..b5378fd28e74c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tone_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English tone RoBertaForSequenceClassification from quesmed +author: John Snow Labs +name: tone +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tone` is a English model originally trained by quesmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tone_en_5.5.0_3.0_1725680326292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tone_en_5.5.0_3.0_1725680326292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("tone","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("tone", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tone| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/quesmed/tone \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-tone_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-tone_pipeline_en.md new file mode 100644 index 00000000000000..fa9697be192847 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-tone_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tone_pipeline pipeline RoBertaForSequenceClassification from quesmed +author: John Snow Labs +name: tone_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tone_pipeline` is a English model originally trained by quesmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tone_pipeline_en_5.5.0_3.0_1725680348485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tone_pipeline_en_5.5.0_3.0_1725680348485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tone_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tone_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tone_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/quesmed/tone + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-trainer_chapter4_rishabh_sucks_at_code_en.md b/docs/_posts/ahmedlone127/2024-09-07-trainer_chapter4_rishabh_sucks_at_code_en.md new file mode 100644 index 00000000000000..6b0ad4a6ab3bd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-trainer_chapter4_rishabh_sucks_at_code_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trainer_chapter4_rishabh_sucks_at_code DistilBertForSequenceClassification from Rishabh-sucks-at-code +author: John Snow Labs +name: trainer_chapter4_rishabh_sucks_at_code +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trainer_chapter4_rishabh_sucks_at_code` is a English model originally trained by Rishabh-sucks-at-code. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trainer_chapter4_rishabh_sucks_at_code_en_5.5.0_3.0_1725674758314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trainer_chapter4_rishabh_sucks_at_code_en_5.5.0_3.0_1725674758314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("trainer_chapter4_rishabh_sucks_at_code","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("trainer_chapter4_rishabh_sucks_at_code", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trainer_chapter4_rishabh_sucks_at_code| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Rishabh-sucks-at-code/trainer-chapter4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-trans_vietnamese_english_v2_en.md b/docs/_posts/ahmedlone127/2024-09-07-trans_vietnamese_english_v2_en.md new file mode 100644 index 00000000000000..5f5be2b18e5674 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-trans_vietnamese_english_v2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trans_vietnamese_english_v2 MarianTransformer from B2111797 +author: John Snow Labs +name: trans_vietnamese_english_v2 +date: 2024-09-07 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trans_vietnamese_english_v2` is a English model originally trained by B2111797. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trans_vietnamese_english_v2_en_5.5.0_3.0_1725740323125.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trans_vietnamese_english_v2_en_5.5.0_3.0_1725740323125.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("trans_vietnamese_english_v2","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("trans_vietnamese_english_v2","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trans_vietnamese_english_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|474.5 MB| + +## References + +https://huggingface.co/B2111797/trans-vi-en-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-translatear_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-translatear_english_pipeline_en.md new file mode 100644 index 00000000000000..0cabc17c27bdd1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-translatear_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English translatear_english_pipeline pipeline MarianTransformer from shahad-alh +author: John Snow Labs +name: translatear_english_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translatear_english_pipeline` is a English model originally trained by shahad-alh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translatear_english_pipeline_en_5.5.0_3.0_1725746541809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translatear_english_pipeline_en_5.5.0_3.0_1725746541809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translatear_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translatear_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translatear_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.4 MB| + +## References + +https://huggingface.co/shahad-alh/translateAR_EN + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-translation_vietnamese_english_official_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-translation_vietnamese_english_official_pipeline_en.md new file mode 100644 index 00000000000000..e64e1546cf5554 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-translation_vietnamese_english_official_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English translation_vietnamese_english_official_pipeline pipeline MarianTransformer from NguyenManhAI +author: John Snow Labs +name: translation_vietnamese_english_official_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translation_vietnamese_english_official_pipeline` is a English model originally trained by NguyenManhAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translation_vietnamese_english_official_pipeline_en_5.5.0_3.0_1725741056843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translation_vietnamese_english_official_pipeline_en_5.5.0_3.0_1725741056843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translation_vietnamese_english_official_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translation_vietnamese_english_official_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translation_vietnamese_english_official_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|475.1 MB| + +## References + +https://huggingface.co/NguyenManhAI/translation-vi-en-official + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_pipeline_xx.md new file mode 100644 index 00000000000000..044eccab90f108 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual translit_ppa_mediterranean_pipeline pipeline XlmRoBertaEmbeddings from orxhelili +author: John Snow Labs +name: translit_ppa_mediterranean_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translit_ppa_mediterranean_pipeline` is a Multilingual model originally trained by orxhelili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translit_ppa_mediterranean_pipeline_xx_5.5.0_3.0_1725677104260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translit_ppa_mediterranean_pipeline_xx_5.5.0_3.0_1725677104260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("translit_ppa_mediterranean_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("translit_ppa_mediterranean_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translit_ppa_mediterranean_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.5 GB| + +## References + +https://huggingface.co/orxhelili/translit_ppa_mediterranean + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_xx.md b/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_xx.md new file mode 100644 index 00000000000000..6cc2861ddcc01f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-translit_ppa_mediterranean_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual translit_ppa_mediterranean XlmRoBertaEmbeddings from orxhelili +author: John Snow Labs +name: translit_ppa_mediterranean +date: 2024-09-07 +tags: [xx, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`translit_ppa_mediterranean` is a Multilingual model originally trained by orxhelili. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/translit_ppa_mediterranean_xx_5.5.0_3.0_1725677025490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/translit_ppa_mediterranean_xx_5.5.0_3.0_1725677025490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("translit_ppa_mediterranean","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("translit_ppa_mediterranean","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|translit_ppa_mediterranean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|xx| +|Size:|1.5 GB| + +## References + +https://huggingface.co/orxhelili/translit_ppa_mediterranean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-trustpilot_balanced_location_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-trustpilot_balanced_location_roberta_en.md new file mode 100644 index 00000000000000..144ac25866498f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-trustpilot_balanced_location_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English trustpilot_balanced_location_roberta RoBertaForSequenceClassification from riken01 +author: John Snow Labs +name: trustpilot_balanced_location_roberta +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trustpilot_balanced_location_roberta` is a English model originally trained by riken01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trustpilot_balanced_location_roberta_en_5.5.0_3.0_1725718032302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trustpilot_balanced_location_roberta_en_5.5.0_3.0_1725718032302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("trustpilot_balanced_location_roberta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("trustpilot_balanced_location_roberta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trustpilot_balanced_location_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|442.4 MB| + +## References + +https://huggingface.co/riken01/TrustPilot-balanced-location-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_en.md new file mode 100644 index 00000000000000..8f8ae9174691d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_jun2020 RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2020 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2020` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2020_en_5.5.0_3.0_1725673204145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2020_en_5.5.0_3.0_1725673204145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2020","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_jun2020","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2020| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_pipeline_en.md new file mode 100644 index 00000000000000..827866dca6ebca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_jun2020_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_jun2020_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_jun2020_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_jun2020_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2020_pipeline_en_5.5.0_3.0_1725673225260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_jun2020_pipeline_en_5.5.0_3.0_1725673225260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_jun2020_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_jun2020_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_jun2020_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-jun2020 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_en.md new file mode 100644 index 00000000000000..1412de59dcca81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_mar2020 RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_mar2020 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_mar2020` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2020_en_5.5.0_3.0_1725716119919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2020_en_5.5.0_3.0_1725716119919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_mar2020","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_mar2020","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_mar2020| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-mar2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_pipeline_en.md new file mode 100644 index 00000000000000..6c49f58b120823 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2020_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_mar2020_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_mar2020_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_mar2020_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2020_pipeline_en_5.5.0_3.0_1725716141641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2020_pipeline_en_5.5.0_3.0_1725716141641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_mar2020_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_mar2020_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_mar2020_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-mar2020 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_en.md new file mode 100644 index 00000000000000..be5c573e371967 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_mar2022 RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_mar2022 +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_mar2022` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2022_en_5.5.0_3.0_1725716264185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2022_en_5.5.0_3.0_1725716264185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_mar2022","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("twitter_roberta_base_mar2022","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_mar2022| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-mar2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_pipeline_en.md new file mode 100644 index 00000000000000..cabbde6621f885 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_mar2022_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_mar2022_pipeline pipeline RoBertaEmbeddings from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_mar2022_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_mar2022_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2022_pipeline_en_5.5.0_3.0_1725716284908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_mar2022_pipeline_en_5.5.0_3.0_1725716284908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_mar2022_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_mar2022_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_mar2022_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|466.1 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-mar2022 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_en.md new file mode 100644 index 00000000000000..282fa10fb9fa01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_sentiment_ahmetayrnc RoBertaForSequenceClassification from ahmetayrnc +author: John Snow Labs +name: twitter_roberta_base_sentiment_ahmetayrnc +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_sentiment_ahmetayrnc` is a English model originally trained by ahmetayrnc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sentiment_ahmetayrnc_en_5.5.0_3.0_1725680020574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sentiment_ahmetayrnc_en_5.5.0_3.0_1725680020574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_sentiment_ahmetayrnc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_sentiment_ahmetayrnc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_sentiment_ahmetayrnc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/ahmetayrnc/twitter-roberta-base-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en.md new file mode 100644 index 00000000000000..d0e8d7d9f07d48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_roberta_base_sentiment_ahmetayrnc_pipeline pipeline RoBertaForSequenceClassification from ahmetayrnc +author: John Snow Labs +name: twitter_roberta_base_sentiment_ahmetayrnc_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_sentiment_ahmetayrnc_pipeline` is a English model originally trained by ahmetayrnc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en_5.5.0_3.0_1725680042767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_sentiment_ahmetayrnc_pipeline_en_5.5.0_3.0_1725680042767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_roberta_base_sentiment_ahmetayrnc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_roberta_base_sentiment_ahmetayrnc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_sentiment_ahmetayrnc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/ahmetayrnc/twitter-roberta-base-sentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en.md new file mode 100644 index 00000000000000..bac7aceb3e7d9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1 XlmRoBertaForSequenceClassification from betteib +author: John Snow Labs +name: twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1` is a English model originally trained by betteib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en_5.5.0_3.0_1725669506649.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_en_5.5.0_3.0_1725669506649.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/betteib/twitter-xlm-roberta-base-sentiment-finetuned-marc-tn-v1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en.md new file mode 100644 index 00000000000000..0eef92cc82a7b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline pipeline XlmRoBertaForSequenceClassification from betteib +author: John Snow Labs +name: twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline` is a English model originally trained by betteib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en_5.5.0_3.0_1725669555517.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline_en_5.5.0_3.0_1725669555517.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_xlm_roberta_base_sentiment_finetuned_marc_tswana_v1_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/betteib/twitter-xlm-roberta-base-sentiment-finetuned-marc-tn-v1.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetunned_xx.md b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetunned_xx.md new file mode 100644 index 00000000000000..9bf07ba3610b5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-twitter_xlm_roberta_base_sentiment_finetunned_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual twitter_xlm_roberta_base_sentiment_finetunned XlmRoBertaForSequenceClassification from citizenlab +author: John Snow Labs +name: twitter_xlm_roberta_base_sentiment_finetunned +date: 2024-09-07 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_xlm_roberta_base_sentiment_finetunned` is a Multilingual model originally trained by citizenlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetunned_xx_5.5.0_3.0_1725712161132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_xlm_roberta_base_sentiment_finetunned_xx_5.5.0_3.0_1725712161132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_xlm_roberta_base_sentiment_finetunned","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("twitter_xlm_roberta_base_sentiment_finetunned", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_xlm_roberta_base_sentiment_finetunned| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/citizenlab/twitter-xlm-roberta-base-sentiment-finetunned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-unibert_distilbert_3_en.md b/docs/_posts/ahmedlone127/2024-09-07-unibert_distilbert_3_en.md new file mode 100644 index 00000000000000..f7e2c073dcdfb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-unibert_distilbert_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English unibert_distilbert_3 DistilBertForTokenClassification from dbala02 +author: John Snow Labs +name: unibert_distilbert_3 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, distilbert, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`unibert_distilbert_3` is a English model originally trained by dbala02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/unibert_distilbert_3_en_5.5.0_3.0_1725734327743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/unibert_distilbert_3_en_5.5.0_3.0_1725734327743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = DistilBertForTokenClassification.pretrained("unibert_distilbert_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = DistilBertForTokenClassification.pretrained("unibert_distilbert_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|unibert_distilbert_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/dbala02/uniBERT.distilBERT.3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-urdubert_en.md b/docs/_posts/ahmedlone127/2024-09-07-urdubert_en.md new file mode 100644 index 00000000000000..945e5497c3308a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-urdubert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English urdubert RoBertaEmbeddings from mwz +author: John Snow Labs +name: urdubert +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`urdubert` is a English model originally trained by mwz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/urdubert_en_5.5.0_3.0_1725716626360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/urdubert_en_5.5.0_3.0_1725716626360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("urdubert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("urdubert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|urdubert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|311.5 MB| + +## References + +https://huggingface.co/mwz/UrduBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-urdubert_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-urdubert_pipeline_en.md new file mode 100644 index 00000000000000..be7e17f8db0c17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-urdubert_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English urdubert_pipeline pipeline RoBertaEmbeddings from mwz +author: John Snow Labs +name: urdubert_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`urdubert_pipeline` is a English model originally trained by mwz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/urdubert_pipeline_en_5.5.0_3.0_1725716640805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/urdubert_pipeline_en_5.5.0_3.0_1725716640805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("urdubert_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("urdubert_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|urdubert_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|311.5 MB| + +## References + +https://huggingface.co/mwz/UrduBert + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_pipeline_uz.md b/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_pipeline_uz.md new file mode 100644 index 00000000000000..c826ef509dca52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_pipeline_uz.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Uzbek uzroberta_v2_pipeline pipeline RoBertaEmbeddings from sinonimayzer +author: John Snow Labs +name: uzroberta_v2_pipeline +date: 2024-09-07 +tags: [uz, open_source, pipeline, onnx] +task: Embeddings +language: uz +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`uzroberta_v2_pipeline` is a Uzbek model originally trained by sinonimayzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/uzroberta_v2_pipeline_uz_5.5.0_3.0_1725716493833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/uzroberta_v2_pipeline_uz_5.5.0_3.0_1725716493833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("uzroberta_v2_pipeline", lang = "uz") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("uzroberta_v2_pipeline", lang = "uz") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|uzroberta_v2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|uz| +|Size:|471.0 MB| + +## References + +https://huggingface.co/sinonimayzer/UzRoBERTa-v2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_uz.md b/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_uz.md new file mode 100644 index 00000000000000..44959913cf3ffa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-uzroberta_v2_uz.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Uzbek uzroberta_v2 RoBertaEmbeddings from sinonimayzer +author: John Snow Labs +name: uzroberta_v2 +date: 2024-09-07 +tags: [uz, open_source, onnx, embeddings, roberta] +task: Embeddings +language: uz +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`uzroberta_v2` is a Uzbek model originally trained by sinonimayzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/uzroberta_v2_uz_5.5.0_3.0_1725716472846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/uzroberta_v2_uz_5.5.0_3.0_1725716472846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("uzroberta_v2","uz") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("uzroberta_v2","uz") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|uzroberta_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|uz| +|Size:|471.0 MB| + +## References + +https://huggingface.co/sinonimayzer/UzRoBERTa-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-v2_mrcl0ud_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-v2_mrcl0ud_pipeline_en.md new file mode 100644 index 00000000000000..6644fdff1ca3d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-v2_mrcl0ud_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English v2_mrcl0ud_pipeline pipeline DistilBertForQuestionAnswering from MrCl0ud +author: John Snow Labs +name: v2_mrcl0ud_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`v2_mrcl0ud_pipeline` is a English model originally trained by MrCl0ud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/v2_mrcl0ud_pipeline_en_5.5.0_3.0_1725745612593.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/v2_mrcl0ud_pipeline_en_5.5.0_3.0_1725745612593.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("v2_mrcl0ud_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("v2_mrcl0ud_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|v2_mrcl0ud_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MrCl0ud/v2 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-weights_en.md b/docs/_posts/ahmedlone127/2024-09-07-weights_en.md new file mode 100644 index 00000000000000..e971afd5b89d8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-weights_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English weights RoBertaEmbeddings from vsty +author: John Snow Labs +name: weights +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`weights` is a English model originally trained by vsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/weights_en_5.5.0_3.0_1725716837806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/weights_en_5.5.0_3.0_1725716837806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("weights","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("weights","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|weights| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|275.1 MB| + +## References + +https://huggingface.co/vsty/weights \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_hu.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_hu.md new file mode 100644 index 00000000000000..6c22a83d49d12a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_hu.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Hungarian whisper_base_cv17_hungarian WhisperForCTC from Hungarians +author: John Snow Labs +name: whisper_base_cv17_hungarian +date: 2024-09-07 +tags: [hu, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_cv17_hungarian` is a Hungarian model originally trained by Hungarians. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_cv17_hungarian_hu_5.5.0_3.0_1725751327328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_cv17_hungarian_hu_5.5.0_3.0_1725751327328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_base_cv17_hungarian","hu") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_base_cv17_hungarian", "hu") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_cv17_hungarian| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|hu| +|Size:|641.3 MB| + +## References + +https://huggingface.co/Hungarians/whisper-base-cv17-hu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_pipeline_hu.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_pipeline_hu.md new file mode 100644 index 00000000000000..373a4e1a6a6fba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_base_cv17_hungarian_pipeline_hu.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Hungarian whisper_base_cv17_hungarian_pipeline pipeline WhisperForCTC from Hungarians +author: John Snow Labs +name: whisper_base_cv17_hungarian_pipeline +date: 2024-09-07 +tags: [hu, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: hu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_base_cv17_hungarian_pipeline` is a Hungarian model originally trained by Hungarians. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_base_cv17_hungarian_pipeline_hu_5.5.0_3.0_1725751361475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_base_cv17_hungarian_pipeline_hu_5.5.0_3.0_1725751361475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_base_cv17_hungarian_pipeline", lang = "hu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_base_cv17_hungarian_pipeline", lang = "hu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_base_cv17_hungarian_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hu| +|Size:|641.3 MB| + +## References + +https://huggingface.co/Hungarians/whisper-base-cv17-hu + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_gujarati_small_pipeline_gu.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_gujarati_small_pipeline_gu.md new file mode 100644 index 00000000000000..c3c9d56c0fa068 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_gujarati_small_pipeline_gu.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Gujarati whisper_gujarati_small_pipeline pipeline WhisperForCTC from vasista22 +author: John Snow Labs +name: whisper_gujarati_small_pipeline +date: 2024-09-07 +tags: [gu, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: gu +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_gujarati_small_pipeline` is a Gujarati model originally trained by vasista22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_gujarati_small_pipeline_gu_5.5.0_3.0_1725752781613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_gujarati_small_pipeline_gu_5.5.0_3.0_1725752781613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_gujarati_small_pipeline", lang = "gu") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_gujarati_small_pipeline", lang = "gu") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_gujarati_small_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|gu| +|Size:|1.7 GB| + +## References + +https://huggingface.co/vasista22/whisper-gujarati-small + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_noisy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_noisy_pipeline_en.md new file mode 100644 index 00000000000000..1788fa81963438 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_noisy_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_noisy_pipeline pipeline WhisperForCTC from alxfng +author: John Snow Labs +name: whisper_noisy_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_noisy_pipeline` is a English model originally trained by alxfng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_noisy_pipeline_en_5.5.0_3.0_1725751487309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_noisy_pipeline_en_5.5.0_3.0_1725751487309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_noisy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_noisy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_noisy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|643.5 MB| + +## References + +https://huggingface.co/alxfng/whisper-noisy + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_es.md new file mode 100644 index 00000000000000..f56ec9adaf6e84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_es.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Castilian, Spanish whisper_small200sep4_spanish WhisperForCTC from jessicadiveai +author: John Snow Labs +name: whisper_small200sep4_spanish +date: 2024-09-07 +tags: [es, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small200sep4_spanish` is a Castilian, Spanish model originally trained by jessicadiveai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small200sep4_spanish_es_5.5.0_3.0_1725752306051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small200sep4_spanish_es_5.5.0_3.0_1725752306051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small200sep4_spanish","es") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small200sep4_spanish", "es") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small200sep4_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|es| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jessicadiveai/whisper-small200sep4-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_pipeline_es.md new file mode 100644 index 00000000000000..42b46674d5859d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small200sep4_spanish_pipeline_es.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Castilian, Spanish whisper_small200sep4_spanish_pipeline pipeline WhisperForCTC from jessicadiveai +author: John Snow Labs +name: whisper_small200sep4_spanish_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small200sep4_spanish_pipeline` is a Castilian, Spanish model originally trained by jessicadiveai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small200sep4_spanish_pipeline_es_5.5.0_3.0_1725752392695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small200sep4_spanish_pipeline_es_5.5.0_3.0_1725752392695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small200sep4_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small200sep4_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small200sep4_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jessicadiveai/whisper-small200sep4-es + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_english_atco2_asr_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_english_atco2_asr_pipeline_en.md new file mode 100644 index 00000000000000..d605dc02764d66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_english_atco2_asr_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_english_atco2_asr_pipeline pipeline WhisperForCTC from jlvdoorn +author: John Snow Labs +name: whisper_small_english_atco2_asr_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_english_atco2_asr_pipeline` is a English model originally trained by jlvdoorn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_english_atco2_asr_pipeline_en_5.5.0_3.0_1725750360870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_english_atco2_asr_pipeline_en_5.5.0_3.0_1725750360870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_english_atco2_asr_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_english_atco2_asr_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_english_atco2_asr_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jlvdoorn/whisper-small.en-atco2-asr + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_finetunedenglish_speechfinal_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_finetunedenglish_speechfinal_en.md new file mode 100644 index 00000000000000..7926c83d1de192 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_finetunedenglish_speechfinal_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_finetunedenglish_speechfinal WhisperForCTC from tonybegemy +author: John Snow Labs +name: whisper_small_finetunedenglish_speechfinal +date: 2024-09-07 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_finetunedenglish_speechfinal` is a English model originally trained by tonybegemy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_finetunedenglish_speechfinal_en_5.5.0_3.0_1725749867651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_finetunedenglish_speechfinal_en_5.5.0_3.0_1725749867651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_finetunedenglish_speechfinal","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_finetunedenglish_speechfinal", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_finetunedenglish_speechfinal| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/tonybegemy/whisper_small_finetunedenglish_speechfinal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_en.md new file mode 100644 index 00000000000000..fc67800d15416f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_small_hindi_drinktoomuchsax WhisperForCTC from drinktoomuchsax +author: John Snow Labs +name: whisper_small_hindi_drinktoomuchsax +date: 2024-09-07 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hindi_drinktoomuchsax` is a English model originally trained by drinktoomuchsax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_drinktoomuchsax_en_5.5.0_3.0_1725752613058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_drinktoomuchsax_en_5.5.0_3.0_1725752613058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_hindi_drinktoomuchsax","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_hindi_drinktoomuchsax", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hindi_drinktoomuchsax| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/drinktoomuchsax/whisper-small-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_pipeline_en.md new file mode 100644 index 00000000000000..626221c6da0565 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_hindi_drinktoomuchsax_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_small_hindi_drinktoomuchsax_pipeline pipeline WhisperForCTC from drinktoomuchsax +author: John Snow Labs +name: whisper_small_hindi_drinktoomuchsax_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_hindi_drinktoomuchsax_pipeline` is a English model originally trained by drinktoomuchsax. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_drinktoomuchsax_pipeline_en_5.5.0_3.0_1725752693438.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_hindi_drinktoomuchsax_pipeline_en_5.5.0_3.0_1725752693438.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_hindi_drinktoomuchsax_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_hindi_drinktoomuchsax_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_hindi_drinktoomuchsax_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/drinktoomuchsax/whisper-small-hi + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_ku.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_ku.md new file mode 100644 index 00000000000000..add930dbcb3028 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_ku.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Kurdish whisper_small_kurdish_sorani_10 WhisperForCTC from roshna-omer +author: John Snow Labs +name: whisper_small_kurdish_sorani_10 +date: 2024-09-07 +tags: [ku, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ku +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_kurdish_sorani_10` is a Kurdish model originally trained by roshna-omer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_kurdish_sorani_10_ku_5.5.0_3.0_1725752803823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_kurdish_sorani_10_ku_5.5.0_3.0_1725752803823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_kurdish_sorani_10","ku") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_kurdish_sorani_10", "ku") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_kurdish_sorani_10| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ku| +|Size:|1.7 GB| + +## References + +https://huggingface.co/roshna-omer/whisper-small-Kurdish-Sorani-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_pipeline_ku.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_pipeline_ku.md new file mode 100644 index 00000000000000..5537db94533114 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_kurdish_sorani_10_pipeline_ku.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Kurdish whisper_small_kurdish_sorani_10_pipeline pipeline WhisperForCTC from roshna-omer +author: John Snow Labs +name: whisper_small_kurdish_sorani_10_pipeline +date: 2024-09-07 +tags: [ku, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: ku +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_kurdish_sorani_10_pipeline` is a Kurdish model originally trained by roshna-omer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_kurdish_sorani_10_pipeline_ku_5.5.0_3.0_1725752882030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_kurdish_sorani_10_pipeline_ku_5.5.0_3.0_1725752882030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_kurdish_sorani_10_pipeline", lang = "ku") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_kurdish_sorani_10_pipeline", lang = "ku") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_kurdish_sorani_10_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ku| +|Size:|1.7 GB| + +## References + +https://huggingface.co/roshna-omer/whisper-small-Kurdish-Sorani-10 + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_russian_v2_artyomboyko_ru.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_russian_v2_artyomboyko_ru.md new file mode 100644 index 00000000000000..8a43287fb16c85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_russian_v2_artyomboyko_ru.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Russian whisper_small_russian_v2_artyomboyko WhisperForCTC from artyomboyko +author: John Snow Labs +name: whisper_small_russian_v2_artyomboyko +date: 2024-09-07 +tags: [ru, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: ru +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_russian_v2_artyomboyko` is a Russian model originally trained by artyomboyko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_russian_v2_artyomboyko_ru_5.5.0_3.0_1725751968285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_russian_v2_artyomboyko_ru_5.5.0_3.0_1725751968285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_russian_v2_artyomboyko","ru") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_russian_v2_artyomboyko", "ru") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_russian_v2_artyomboyko| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|ru| +|Size:|1.7 GB| + +## References + +https://huggingface.co/artyomboyko/whisper-small-ru-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_small_twi_arxiv_pipeline_tw.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_twi_arxiv_pipeline_tw.md new file mode 100644 index 00000000000000..b6e62ae731e65f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_small_twi_arxiv_pipeline_tw.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Twi whisper_small_twi_arxiv_pipeline pipeline WhisperForCTC from jdapaah +author: John Snow Labs +name: whisper_small_twi_arxiv_pipeline +date: 2024-09-07 +tags: [tw, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: tw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_twi_arxiv_pipeline` is a Twi model originally trained by jdapaah. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_twi_arxiv_pipeline_tw_5.5.0_3.0_1725751432266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_twi_arxiv_pipeline_tw_5.5.0_3.0_1725751432266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_twi_arxiv_pipeline", lang = "tw") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_twi_arxiv_pipeline", lang = "tw") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_twi_arxiv_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|tw| +|Size:|1.7 GB| + +## References + +https://huggingface.co/jdapaah/whisper-small-twi-arxiv + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_en.md new file mode 100644 index 00000000000000..6860147cfb11e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_few_audios WhisperForCTC from breco +author: John Snow Labs +name: whisper_tiny_few_audios +date: 2024-09-07 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_few_audios` is a English model originally trained by breco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_few_audios_en_5.5.0_3.0_1725750246636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_few_audios_en_5.5.0_3.0_1725750246636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_few_audios","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_few_audios", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_few_audios| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/breco/whisper-tiny-few-audios \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_pipeline_en.md new file mode 100644 index 00000000000000..ec7d1565f1c9b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_few_audios_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_few_audios_pipeline pipeline WhisperForCTC from breco +author: John Snow Labs +name: whisper_tiny_few_audios_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_few_audios_pipeline` is a English model originally trained by breco. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_few_audios_pipeline_en_5.5.0_3.0_1725750266006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_few_audios_pipeline_en_5.5.0_3.0_1725750266006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_few_audios_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_few_audios_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_few_audios_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/breco/whisper-tiny-few-audios + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_en.md new file mode 100644 index 00000000000000..a865c169de20e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_tiny_minds14_english_us_markredito WhisperForCTC from markredito +author: John Snow Labs +name: whisper_tiny_minds14_english_us_markredito +date: 2024-09-07 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_minds14_english_us_markredito` is a English model originally trained by markredito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_minds14_english_us_markredito_en_5.5.0_3.0_1725752024025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_minds14_english_us_markredito_en_5.5.0_3.0_1725752024025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_minds14_english_us_markredito","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_minds14_english_us_markredito", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_minds14_english_us_markredito| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/markredito/whisper-tiny-minds14-en-us \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_pipeline_en.md new file mode 100644 index 00000000000000..3ee7617e371c1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_minds14_english_us_markredito_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English whisper_tiny_minds14_english_us_markredito_pipeline pipeline WhisperForCTC from markredito +author: John Snow Labs +name: whisper_tiny_minds14_english_us_markredito_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_minds14_english_us_markredito_pipeline` is a English model originally trained by markredito. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_minds14_english_us_markredito_pipeline_en_5.5.0_3.0_1725752042905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_minds14_english_us_markredito_pipeline_en_5.5.0_3.0_1725752042905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_tiny_minds14_english_us_markredito_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_tiny_minds14_english_us_markredito_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_minds14_english_us_markredito_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|389.9 MB| + +## References + +https://huggingface.co/markredito/whisper-tiny-minds14-en-us + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_portuguese_dominguesm_pt.md b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_portuguese_dominguesm_pt.md new file mode 100644 index 00000000000000..51d491e32c870b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-whisper_tiny_portuguese_dominguesm_pt.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Portuguese whisper_tiny_portuguese_dominguesm WhisperForCTC from dominguesm +author: John Snow Labs +name: whisper_tiny_portuguese_dominguesm +date: 2024-09-07 +tags: [pt, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_tiny_portuguese_dominguesm` is a Portuguese model originally trained by dominguesm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_tiny_portuguese_dominguesm_pt_5.5.0_3.0_1725752468945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_tiny_portuguese_dominguesm_pt_5.5.0_3.0_1725752468945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_tiny_portuguese_dominguesm","pt") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_tiny_portuguese_dominguesm", "pt") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_tiny_portuguese_dominguesm| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|pt| +|Size:|390.8 MB| + +## References + +https://huggingface.co/dominguesm/whisper-tiny-pt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_en.md b/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_en.md new file mode 100644 index 00000000000000..eba73ce1a298ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English wikismall_roberta RoBertaEmbeddings from acloudfan +author: John Snow Labs +name: wikismall_roberta +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wikismall_roberta` is a English model originally trained by acloudfan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wikismall_roberta_en_5.5.0_3.0_1725678345347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wikismall_roberta_en_5.5.0_3.0_1725678345347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = RoBertaEmbeddings.pretrained("wikismall_roberta","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = RoBertaEmbeddings.pretrained("wikismall_roberta","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wikismall_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[roberta]| +|Language:|en| +|Size:|465.4 MB| + +## References + +https://huggingface.co/acloudfan/wikismall-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_pipeline_en.md new file mode 100644 index 00000000000000..2485bbeb767c65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wikismall_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English wikismall_roberta_pipeline pipeline RoBertaEmbeddings from acloudfan +author: John Snow Labs +name: wikismall_roberta_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wikismall_roberta_pipeline` is a English model originally trained by acloudfan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wikismall_roberta_pipeline_en_5.5.0_3.0_1725678366737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wikismall_roberta_pipeline_en_5.5.0_3.0_1725678366737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wikismall_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wikismall_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wikismall_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|465.5 MB| + +## References + +https://huggingface.co/acloudfan/wikismall-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_en.md b/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_en.md new file mode 100644 index 00000000000000..6779721c9bd4fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English wolfbbsroberta_large CamemBertEmbeddings from itsunoda +author: John Snow Labs +name: wolfbbsroberta_large +date: 2024-09-07 +tags: [en, open_source, onnx, embeddings, camembert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: CamemBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wolfbbsroberta_large` is a English model originally trained by itsunoda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wolfbbsroberta_large_en_5.5.0_3.0_1725692316115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wolfbbsroberta_large_en_5.5.0_3.0_1725692316115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = CamemBertEmbeddings.pretrained("wolfbbsroberta_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = CamemBertEmbeddings.pretrained("wolfbbsroberta_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wolfbbsroberta_large| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[camembert]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/itsunoda/wolfbbsRoBERTa-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_pipeline_en.md new file mode 100644 index 00000000000000..4c5a5918facb98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wolfbbsroberta_large_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English wolfbbsroberta_large_pipeline pipeline CamemBertEmbeddings from itsunoda +author: John Snow Labs +name: wolfbbsroberta_large_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained CamemBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wolfbbsroberta_large_pipeline` is a English model originally trained by itsunoda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wolfbbsroberta_large_pipeline_en_5.5.0_3.0_1725692334622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wolfbbsroberta_large_pipeline_en_5.5.0_3.0_1725692334622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wolfbbsroberta_large_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wolfbbsroberta_large_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wolfbbsroberta_large_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/itsunoda/wolfbbsRoBERTa-large + +## Included Models + +- DocumentAssembler +- TokenizerModel +- CamemBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_en.md b/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_en.md new file mode 100644 index 00000000000000..9616cbaa203b74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English wolof_description_guru_0 DistilBertForQuestionAnswering from gjonesQ02 +author: John Snow Labs +name: wolof_description_guru_0 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, distilbert] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wolof_description_guru_0` is a English model originally trained by gjonesQ02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wolof_description_guru_0_en_5.5.0_3.0_1725695321973.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wolof_description_guru_0_en_5.5.0_3.0_1725695321973.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = DistilBertForQuestionAnswering.pretrained("wolof_description_guru_0","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = DistilBertForQuestionAnswering.pretrained("wolof_description_guru_0", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wolof_description_guru_0| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gjonesQ02/WO_Description_Guru_0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_pipeline_en.md new file mode 100644 index 00000000000000..fb9fb298979a35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wolof_description_guru_0_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English wolof_description_guru_0_pipeline pipeline DistilBertForQuestionAnswering from gjonesQ02 +author: John Snow Labs +name: wolof_description_guru_0_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wolof_description_guru_0_pipeline` is a English model originally trained by gjonesQ02. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wolof_description_guru_0_pipeline_en_5.5.0_3.0_1725695334020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wolof_description_guru_0_pipeline_en_5.5.0_3.0_1725695334020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wolof_description_guru_0_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wolof_description_guru_0_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wolof_description_guru_0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/gjonesQ02/WO_Description_Guru_0 + +## Included Models + +- MultiDocumentAssembler +- DistilBertForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-wolof_finetuned_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-wolof_finetuned_ner_pipeline_en.md new file mode 100644 index 00000000000000..55e29bdbf59c23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-wolof_finetuned_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English wolof_finetuned_ner_pipeline pipeline XlmRoBertaForTokenClassification from vonewman +author: John Snow Labs +name: wolof_finetuned_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wolof_finetuned_ner_pipeline` is a English model originally trained by vonewman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wolof_finetuned_ner_pipeline_en_5.5.0_3.0_1725745271915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wolof_finetuned_ner_pipeline_en_5.5.0_3.0_1725745271915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("wolof_finetuned_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("wolof_finetuned_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wolof_finetuned_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|777.5 MB| + +## References + +https://huggingface.co/vonewman/wolof-finetuned-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_es.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_es.md new file mode 100644 index 00000000000000..b3b8855a136619 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_es.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Castilian, Spanish xlm_r_finetuned_toxic_political_tweets_spanish XlmRoBertaForSequenceClassification from Newtral +author: John Snow Labs +name: xlm_r_finetuned_toxic_political_tweets_spanish +date: 2024-09-07 +tags: [es, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_r_finetuned_toxic_political_tweets_spanish` is a Castilian, Spanish model originally trained by Newtral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_r_finetuned_toxic_political_tweets_spanish_es_5.5.0_3.0_1725712354032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_r_finetuned_toxic_political_tweets_spanish_es_5.5.0_3.0_1725712354032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_r_finetuned_toxic_political_tweets_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_r_finetuned_toxic_political_tweets_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_r_finetuned_toxic_political_tweets_spanish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|es| +|Size:|801.3 MB| + +## References + +https://huggingface.co/Newtral/xlm-r-finetuned-toxic-political-tweets-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es.md new file mode 100644 index 00000000000000..ee4c3eaef1a1c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Castilian, Spanish xlm_r_finetuned_toxic_political_tweets_spanish_pipeline pipeline XlmRoBertaForSequenceClassification from Newtral +author: John Snow Labs +name: xlm_r_finetuned_toxic_political_tweets_spanish_pipeline +date: 2024-09-07 +tags: [es, open_source, pipeline, onnx] +task: Text Classification +language: es +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_r_finetuned_toxic_political_tweets_spanish_pipeline` is a Castilian, Spanish model originally trained by Newtral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es_5.5.0_3.0_1725712481735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_r_finetuned_toxic_political_tweets_spanish_pipeline_es_5.5.0_3.0_1725712481735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_r_finetuned_toxic_political_tweets_spanish_pipeline", lang = "es") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_r_finetuned_toxic_political_tweets_spanish_pipeline", lang = "es") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_r_finetuned_toxic_political_tweets_spanish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|es| +|Size:|801.4 MB| + +## References + +https://huggingface.co/Newtral/xlm-r-finetuned-toxic-political-tweets-es + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_autext_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_autext_en.md new file mode 100644 index 00000000000000..eeb28d0ae0ee1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_autext_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_autext XlmRoBertaForSequenceClassification from jorgefg03 +author: John Snow Labs +name: xlm_roberta_base_autext +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_autext` is a English model originally trained by jorgefg03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_autext_en_5.5.0_3.0_1725711659700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_autext_en_5.5.0_3.0_1725711659700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_autext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_autext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_autext| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|804.3 MB| + +## References + +https://huggingface.co/jorgefg03/xlm-roberta-base-autext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en.md new file mode 100644 index 00000000000000..341796fafe4c85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1 XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1 +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en_5.5.0_3.0_1725710787728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_en_5.5.0_3.0_1725710787728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-16-3-2023-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en.md new file mode 100644 index 00000000000000..65d7c4948b6981 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline pipeline XlmRoBertaForQuestionAnswering from jluckyboyj +author: John Snow Labs +name: xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline` is a English model originally trained by jluckyboyj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en_5.5.0_3.0_1725710843480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline_en_5.5.0_3.0_1725710843480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_augument_visquad2_16_3_2023_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.0 MB| + +## References + +https://huggingface.co/jluckyboyj/xlm-roberta-base-finetuned-augument-visquad2-16-3-2023-1 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_en.md new file mode 100644 index 00000000000000..323ee50d36813b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_tomo XlmRoBertaForSequenceClassification from TomO +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_tomo +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_tomo` is a English model originally trained by TomO. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_tomo_en_5.5.0_3.0_1725711622505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_tomo_en_5.5.0_3.0_1725711622505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_tomo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_finetuned_marc_english_tomo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_tomo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/TomO/xlm-roberta-base-finetuned-marc-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en.md new file mode 100644 index 00000000000000..3b48080078c107 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_english_tomo_pipeline pipeline XlmRoBertaForSequenceClassification from TomO +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_english_tomo_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_english_tomo_pipeline` is a English model originally trained by TomO. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en_5.5.0_3.0_1725711708277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_english_tomo_pipeline_en_5.5.0_3.0_1725711708277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_tomo_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_marc_english_tomo_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_english_tomo_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.5 MB| + +## References + +https://huggingface.co/TomO/xlm-roberta-base-finetuned-marc-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_yuri_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_yuri_pipeline_en.md new file mode 100644 index 00000000000000..ba9daa80ea5739 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_marc_yuri_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_marc_yuri_pipeline pipeline XlmRoBertaForSequenceClassification from Yuri +author: John Snow Labs +name: xlm_roberta_base_finetuned_marc_yuri_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_marc_yuri_pipeline` is a English model originally trained by Yuri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_yuri_pipeline_en_5.5.0_3.0_1725712945369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_marc_yuri_pipeline_en_5.5.0_3.0_1725712945369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_marc_yuri_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_marc_yuri_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_marc_yuri_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|835.1 MB| + +## References + +https://huggingface.co/Yuri/xlm-roberta-base-finetuned-marc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_en.md new file mode 100644 index 00000000000000..a313e88cd5151d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_aiekek XlmRoBertaForTokenClassification from AIEKEK +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_aiekek +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_aiekek` is a English model originally trained by AIEKEK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_aiekek_en_5.5.0_3.0_1725694339275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_aiekek_en_5.5.0_3.0_1725694339275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_aiekek","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_aiekek", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_aiekek| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/AIEKEK/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en.md new file mode 100644 index 00000000000000..c28db94410bafb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_aiekek_pipeline pipeline XlmRoBertaForTokenClassification from AIEKEK +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_aiekek_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_aiekek_pipeline` is a English model originally trained by AIEKEK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en_5.5.0_3.0_1725694417134.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_aiekek_pipeline_en_5.5.0_3.0_1725694417134.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_aiekek_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_aiekek_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_aiekek_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/AIEKEK/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_cataluna84_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_cataluna84_en.md new file mode 100644 index 00000000000000..5114708a96d3ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_cataluna84_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_cataluna84 XlmRoBertaForTokenClassification from cataluna84 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_cataluna84 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_cataluna84` is a English model originally trained by cataluna84. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_cataluna84_en_5.5.0_3.0_1725688063443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_cataluna84_en_5.5.0_3.0_1725688063443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_cataluna84","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_cataluna84", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_cataluna84| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/cataluna84/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_en.md new file mode 100644 index 00000000000000..8bdae898a54eaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_deepaperi XlmRoBertaForTokenClassification from DeepaPeri +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_deepaperi +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_deepaperi` is a English model originally trained by DeepaPeri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_deepaperi_en_5.5.0_3.0_1725705731608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_deepaperi_en_5.5.0_3.0_1725705731608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_deepaperi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_deepaperi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_deepaperi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|829.4 MB| + +## References + +https://huggingface.co/DeepaPeri/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en.md new file mode 100644 index 00000000000000..980ca8aca26af4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline pipeline XlmRoBertaForTokenClassification from DeepaPeri +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline` is a English model originally trained by DeepaPeri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en_5.5.0_3.0_1725705811220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline_en_5.5.0_3.0_1725705811220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_deepaperi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|829.4 MB| + +## References + +https://huggingface.co/DeepaPeri/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_en.md new file mode 100644 index 00000000000000..43a29eb1c30b6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_huggingbase XlmRoBertaForTokenClassification from huggingbase +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_huggingbase +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_huggingbase` is a English model originally trained by huggingbase. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_huggingbase_en_5.5.0_3.0_1725692862899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_huggingbase_en_5.5.0_3.0_1725692862899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_huggingbase","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_huggingbase", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_huggingbase| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/huggingbase/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en.md new file mode 100644 index 00000000000000..f1218b0e79fb03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline pipeline XlmRoBertaForTokenClassification from huggingbase +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline` is a English model originally trained by huggingbase. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en_5.5.0_3.0_1725692924748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline_en_5.5.0_3.0_1725692924748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_huggingbase_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|861.0 MB| + +## References + +https://huggingface.co/huggingbase/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en.md new file mode 100644 index 00000000000000..114de226078487 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline pipeline XlmRoBertaForTokenClassification from ladoza03 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline` is a English model originally trained by ladoza03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en_5.5.0_3.0_1725694624808.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline_en_5.5.0_3.0_1725694624808.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_ladoza03_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|872.7 MB| + +## References + +https://huggingface.co/ladoza03/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_en.md new file mode 100644 index 00000000000000..8e73a46ee6e375 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_sbpark XlmRoBertaForTokenClassification from sbpark +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_sbpark +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_sbpark` is a English model originally trained by sbpark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sbpark_en_5.5.0_3.0_1725743212564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sbpark_en_5.5.0_3.0_1725743212564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_sbpark","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_sbpark", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_sbpark| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|859.5 MB| + +## References + +https://huggingface.co/sbpark/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en.md new file mode 100644 index 00000000000000..08e1e7e37286ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_sbpark_pipeline pipeline XlmRoBertaForTokenClassification from sbpark +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_sbpark_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_sbpark_pipeline` is a English model originally trained by sbpark. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en_5.5.0_3.0_1725743274562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sbpark_pipeline_en_5.5.0_3.0_1725743274562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_sbpark_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_sbpark_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_sbpark_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|859.5 MB| + +## References + +https://huggingface.co/sbpark/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_en.md new file mode 100644 index 00000000000000..c3f69f0ea6b9f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_sreek XlmRoBertaForTokenClassification from Sreek +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_sreek +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_sreek` is a English model originally trained by Sreek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sreek_en_5.5.0_3.0_1725706082963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sreek_en_5.5.0_3.0_1725706082963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_sreek","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_sreek", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_sreek| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Sreek/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en.md new file mode 100644 index 00000000000000..a511785a3825a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_sreek_pipeline pipeline XlmRoBertaForTokenClassification from Sreek +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_sreek_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_sreek_pipeline` is a English model originally trained by Sreek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en_5.5.0_3.0_1725706143220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_sreek_pipeline_en_5.5.0_3.0_1725706143220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_sreek_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_sreek_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_sreek_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Sreek/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_wendao_123_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_wendao_123_en.md new file mode 100644 index 00000000000000..af27582bbc79a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_all_wendao_123_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_wendao_123 XlmRoBertaForTokenClassification from Wendao-123 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_wendao_123 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_wendao_123` is a English model originally trained by Wendao-123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_wendao_123_en_5.5.0_3.0_1725687179715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_wendao_123_en_5.5.0_3.0_1725687179715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_wendao_123","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_wendao_123", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_wendao_123| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/Wendao-123/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_en.md new file mode 100644 index 00000000000000..5ba974a3017a79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_buruzaemon XlmRoBertaForTokenClassification from buruzaemon +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_buruzaemon +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_buruzaemon` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_buruzaemon_en_5.5.0_3.0_1725694690805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_buruzaemon_en_5.5.0_3.0_1725694690805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_buruzaemon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_buruzaemon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_buruzaemon| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|814.3 MB| + +## References + +https://huggingface.co/buruzaemon/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en.md new file mode 100644 index 00000000000000..63b7dd7eabcc73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline pipeline XlmRoBertaForTokenClassification from buruzaemon +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en_5.5.0_3.0_1725694791698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline_en_5.5.0_3.0_1725694791698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_buruzaemon_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|814.3 MB| + +## References + +https://huggingface.co/buruzaemon/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_drigb_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_drigb_en.md new file mode 100644 index 00000000000000..9ad40447488eba --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_drigb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_drigb XlmRoBertaForTokenClassification from drigb +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_drigb +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_drigb` is a English model originally trained by drigb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_drigb_en_5.5.0_3.0_1725704525634.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_drigb_en_5.5.0_3.0_1725704525634.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_drigb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_drigb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_drigb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/drigb/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_en.md new file mode 100644 index 00000000000000..0834cef955120f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_likejazz XlmRoBertaForTokenClassification from likejazz +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_likejazz +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_likejazz` is a English model originally trained by likejazz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_likejazz_en_5.5.0_3.0_1725705086778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_likejazz_en_5.5.0_3.0_1725705086778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_likejazz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_likejazz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_likejazz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|822.2 MB| + +## References + +https://huggingface.co/likejazz/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en.md new file mode 100644 index 00000000000000..0c2dd90cb58cbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_likejazz_pipeline pipeline XlmRoBertaForTokenClassification from likejazz +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_likejazz_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_likejazz_pipeline` is a English model originally trained by likejazz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en_5.5.0_3.0_1725705170505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_likejazz_pipeline_en_5.5.0_3.0_1725705170505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_likejazz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_likejazz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_likejazz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|822.2 MB| + +## References + +https://huggingface.co/likejazz/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_en.md new file mode 100644 index 00000000000000..933d41bb2c9e22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_nrazavi XlmRoBertaForTokenClassification from nrazavi +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_nrazavi +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_nrazavi` is a English model originally trained by nrazavi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_nrazavi_en_5.5.0_3.0_1725692927961.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_nrazavi_en_5.5.0_3.0_1725692927961.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_nrazavi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_nrazavi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_nrazavi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/nrazavi/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en.md new file mode 100644 index 00000000000000..c4cf960f5b9265 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline pipeline XlmRoBertaForTokenClassification from nrazavi +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline` is a English model originally trained by nrazavi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en_5.5.0_3.0_1725693020599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline_en_5.5.0_3.0_1725693020599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_nrazavi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/nrazavi/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en.md new file mode 100644 index 00000000000000..01f12fcecbb7a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_ryo_hsgw XlmRoBertaForTokenClassification from ryo-hsgw +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_ryo_hsgw +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_ryo_hsgw` is a English model originally trained by ryo-hsgw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en_5.5.0_3.0_1725692817545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ryo_hsgw_en_5.5.0_3.0_1725692817545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_ryo_hsgw","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_ryo_hsgw", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_ryo_hsgw| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/ryo-hsgw/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en.md new file mode 100644 index 00000000000000..c51ea99e986131 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline pipeline XlmRoBertaForTokenClassification from ryo-hsgw +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline` is a English model originally trained by ryo-hsgw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en_5.5.0_3.0_1725692908764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline_en_5.5.0_3.0_1725692908764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_ryo_hsgw_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/ryo-hsgw/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_en.md new file mode 100644 index 00000000000000..2d6de3f3274a2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_taoyoung XlmRoBertaForTokenClassification from taoyoung +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_taoyoung +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_taoyoung` is a English model originally trained by taoyoung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_taoyoung_en_5.5.0_3.0_1725705905627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_taoyoung_en_5.5.0_3.0_1725705905627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_taoyoung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_english_taoyoung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_taoyoung| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|818.7 MB| + +## References + +https://huggingface.co/taoyoung/xlm-roberta-base-finetuned-panx-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en.md new file mode 100644 index 00000000000000..0d93829c2ba2c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline pipeline XlmRoBertaForTokenClassification from taoyoung +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline` is a English model originally trained by taoyoung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en_5.5.0_3.0_1725705995863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline_en_5.5.0_3.0_1725705995863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_taoyoung_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|818.7 MB| + +## References + +https://huggingface.co/taoyoung/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_ferro_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_ferro_en.md new file mode 100644 index 00000000000000..3055152dc3317d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_ferro_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_ferro XlmRoBertaForTokenClassification from Ferro +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_ferro +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_ferro` is a English model originally trained by Ferro. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_ferro_en_5.5.0_3.0_1725744356219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_ferro_en_5.5.0_3.0_1725744356219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_ferro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_ferro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_ferro| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/Ferro/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en.md new file mode 100644 index 00000000000000..9a7cdaac208938 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline pipeline XlmRoBertaForTokenClassification from henryjiang +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline` is a English model originally trained by henryjiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en_5.5.0_3.0_1725744452729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline_en_5.5.0_3.0_1725744452729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_henryjiang_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.5 MB| + +## References + +https://huggingface.co/henryjiang/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_inniok_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_inniok_en.md new file mode 100644 index 00000000000000..4a34e7662cbad0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_inniok_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_inniok XlmRoBertaForTokenClassification from inniok +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_inniok +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_inniok` is a English model originally trained by inniok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_inniok_en_5.5.0_3.0_1725693769473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_inniok_en_5.5.0_3.0_1725693769473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_inniok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_inniok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_inniok| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|827.9 MB| + +## References + +https://huggingface.co/inniok/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en.md new file mode 100644 index 00000000000000..13cb22f99a5391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline pipeline XlmRoBertaForTokenClassification from jfmatos-isq +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline` is a English model originally trained by jfmatos-isq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en_5.5.0_3.0_1725704569237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline_en_5.5.0_3.0_1725704569237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_jfmatos_isq_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/jfmatos-isq/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_robkayinto_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_robkayinto_en.md new file mode 100644 index 00000000000000..948d8a9d65d98c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_robkayinto_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_robkayinto XlmRoBertaForTokenClassification from robkayinto +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_robkayinto +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_robkayinto` is a English model originally trained by robkayinto. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_robkayinto_en_5.5.0_3.0_1725694333416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_robkayinto_en_5.5.0_3.0_1725694333416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_robkayinto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_robkayinto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_robkayinto| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/robkayinto/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_en.md new file mode 100644 index 00000000000000..45ef58dfc3345a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_wooseok0303 XlmRoBertaForTokenClassification from wooseok0303 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_wooseok0303 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_wooseok0303` is a English model originally trained by wooseok0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_wooseok0303_en_5.5.0_3.0_1725705896624.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_wooseok0303_en_5.5.0_3.0_1725705896624.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_wooseok0303","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_wooseok0303", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_wooseok0303| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|827.9 MB| + +## References + +https://huggingface.co/wooseok0303/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en.md new file mode 100644 index 00000000000000..d7440c18c9e34b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline pipeline XlmRoBertaForTokenClassification from wooseok0303 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline` is a English model originally trained by wooseok0303. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en_5.5.0_3.0_1725705982901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline_en_5.5.0_3.0_1725705982901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_wooseok0303_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|827.9 MB| + +## References + +https://huggingface.co/wooseok0303/xlm-roberta-base-finetuned-panx-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en.md new file mode 100644 index 00000000000000..ce2470fdb073dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_alexisxiaoyu XlmRoBertaForTokenClassification from alexisxiaoyu +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_alexisxiaoyu +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_alexisxiaoyu` is a English model originally trained by alexisxiaoyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en_5.5.0_3.0_1725743706004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_en_5.5.0_3.0_1725743706004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_alexisxiaoyu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_alexisxiaoyu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_alexisxiaoyu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/alexisxiaoyu/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en.md new file mode 100644 index 00000000000000..df4a3c5b19ec99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline pipeline XlmRoBertaForTokenClassification from alexisxiaoyu +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline` is a English model originally trained by alexisxiaoyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en_5.5.0_3.0_1725743792251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline_en_5.5.0_3.0_1725743792251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_alexisxiaoyu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/alexisxiaoyu/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en.md new file mode 100644 index 00000000000000..824d5480d0adc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_cicimen_pipeline pipeline XlmRoBertaForTokenClassification from cicimen +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_cicimen_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_cicimen_pipeline` is a English model originally trained by cicimen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en_5.5.0_3.0_1725687851936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_cicimen_pipeline_en_5.5.0_3.0_1725687851936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_cicimen_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_cicimen_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_cicimen_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/cicimen/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_en.md new file mode 100644 index 00000000000000..f702d9dec7fcad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_eikoenchine XlmRoBertaForTokenClassification from eikoenchine +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_eikoenchine +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_eikoenchine` is a English model originally trained by eikoenchine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_eikoenchine_en_5.5.0_3.0_1725688673500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_eikoenchine_en_5.5.0_3.0_1725688673500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_eikoenchine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_eikoenchine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_eikoenchine| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/eikoenchine/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en.md new file mode 100644 index 00000000000000..fe5411109c1bed --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline pipeline XlmRoBertaForTokenClassification from eikoenchine +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline` is a English model originally trained by eikoenchine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en_5.5.0_3.0_1725688738242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline_en_5.5.0_3.0_1725688738242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_eikoenchine_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/eikoenchine/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en.md new file mode 100644 index 00000000000000..b13643de5aa213 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline pipeline XlmRoBertaForTokenClassification from Emmanuelalo52 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline` is a English model originally trained by Emmanuelalo52. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en_5.5.0_3.0_1725693556614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline_en_5.5.0_3.0_1725693556614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_emmanuelalo52_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.7 MB| + +## References + +https://huggingface.co/Emmanuelalo52/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_esperesa_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_esperesa_en.md new file mode 100644 index 00000000000000..4610066c329b89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_esperesa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_esperesa XlmRoBertaForTokenClassification from esperesa +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_esperesa +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_esperesa` is a English model originally trained by esperesa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_esperesa_en_5.5.0_3.0_1725744731645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_esperesa_en_5.5.0_3.0_1725744731645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_esperesa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_esperesa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_esperesa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/esperesa/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en.md new file mode 100644 index 00000000000000..16ff68ab020da8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_andreaschandra XlmRoBertaForTokenClassification from andreaschandra +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_andreaschandra +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_andreaschandra` is a English model originally trained by andreaschandra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en_5.5.0_3.0_1725688305297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_andreaschandra_en_5.5.0_3.0_1725688305297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_andreaschandra","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_andreaschandra", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_andreaschandra| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/andreaschandra/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en.md new file mode 100644 index 00000000000000..4ba35ea423765e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline pipeline XlmRoBertaForTokenClassification from andreaschandra +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline` is a English model originally trained by andreaschandra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en_5.5.0_3.0_1725688368203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline_en_5.5.0_3.0_1725688368203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_andreaschandra_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/andreaschandra/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_benjiccee_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_benjiccee_en.md new file mode 100644 index 00000000000000..c4f16df977f556 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_benjiccee_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_benjiccee XlmRoBertaForTokenClassification from Benjiccee +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_benjiccee +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_benjiccee` is a English model originally trained by Benjiccee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_benjiccee_en_5.5.0_3.0_1725694550209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_benjiccee_en_5.5.0_3.0_1725694550209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_benjiccee","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_benjiccee", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_benjiccee| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Benjiccee/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_en.md new file mode 100644 index 00000000000000..e980d3194ac4c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_cataluna84 XlmRoBertaForTokenClassification from cataluna84 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_cataluna84 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_cataluna84` is a English model originally trained by cataluna84. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_cataluna84_en_5.5.0_3.0_1725744962115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_cataluna84_en_5.5.0_3.0_1725744962115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_cataluna84","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_cataluna84", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_cataluna84| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/cataluna84/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en.md new file mode 100644 index 00000000000000..b0214a005f0b9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline pipeline XlmRoBertaForTokenClassification from cataluna84 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline` is a English model originally trained by cataluna84. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en_5.5.0_3.0_1725745023864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline_en_5.5.0_3.0_1725745023864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_cataluna84_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/cataluna84/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_en.md new file mode 100644 index 00000000000000..022a71e7e11edf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_guruji108 XlmRoBertaForTokenClassification from Guruji108 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_guruji108 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_guruji108` is a English model originally trained by Guruji108. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_guruji108_en_5.5.0_3.0_1725704121473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_guruji108_en_5.5.0_3.0_1725704121473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_guruji108","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_guruji108", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_guruji108| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Guruji108/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en.md new file mode 100644 index 00000000000000..3d46c8d3bded24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline pipeline XlmRoBertaForTokenClassification from Guruji108 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline` is a English model originally trained by Guruji108. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en_5.5.0_3.0_1725704183844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline_en_5.5.0_3.0_1725704183844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_guruji108_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/Guruji108/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en.md new file mode 100644 index 00000000000000..fed3a30d3d2c11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline pipeline XlmRoBertaForTokenClassification from LaurentiuStancioiu +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline` is a English model originally trained by LaurentiuStancioiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en_5.5.0_3.0_1725705143644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline_en_5.5.0_3.0_1725705143644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_laurentiustancioiu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/LaurentiuStancioiu/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_noveled_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_noveled_en.md new file mode 100644 index 00000000000000..520ac977d52a2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_noveled_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_noveled XlmRoBertaForTokenClassification from Noveled +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_noveled +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_noveled` is a English model originally trained by Noveled. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_noveled_en_5.5.0_3.0_1725694092004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_noveled_en_5.5.0_3.0_1725694092004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_noveled","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_noveled", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_noveled| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/Noveled/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_smilingface88_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_smilingface88_en.md new file mode 100644 index 00000000000000..33ea7eeec44109 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_smilingface88_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_smilingface88 XlmRoBertaForTokenClassification from smilingface88 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_smilingface88 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_smilingface88` is a English model originally trained by smilingface88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_smilingface88_en_5.5.0_3.0_1725693113059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_smilingface88_en_5.5.0_3.0_1725693113059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_smilingface88","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_smilingface88", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_smilingface88| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/smilingface88/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_en.md new file mode 100644 index 00000000000000..30131bd3b726bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_sponomary XlmRoBertaForTokenClassification from sponomary +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_sponomary +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_sponomary` is a English model originally trained by sponomary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_sponomary_en_5.5.0_3.0_1725704606149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_sponomary_en_5.5.0_3.0_1725704606149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_sponomary","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_sponomary", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_sponomary| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/sponomary/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en.md new file mode 100644 index 00000000000000..28d915e5b7e337 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline pipeline XlmRoBertaForTokenClassification from sponomary +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline` is a English model originally trained by sponomary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en_5.5.0_3.0_1725704668445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline_en_5.5.0_3.0_1725704668445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_sponomary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/sponomary/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en.md new file mode 100644 index 00000000000000..5b3f03f4b3a00f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline pipeline XlmRoBertaForTokenClassification from transformersbook +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline` is a English model originally trained by transformersbook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en_5.5.0_3.0_1725745422125.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline_en_5.5.0_3.0_1725745422125.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_transformersbook_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/transformersbook/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yasu320001_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yasu320001_en.md new file mode 100644 index 00000000000000..640edde236d749 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yasu320001_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_yasu320001 XlmRoBertaForTokenClassification from yasu320001 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_yasu320001 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_yasu320001` is a English model originally trained by yasu320001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yasu320001_en_5.5.0_3.0_1725705553615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yasu320001_en_5.5.0_3.0_1725705553615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yasu320001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yasu320001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_yasu320001| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/yasu320001/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yezune_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yezune_en.md new file mode 100644 index 00000000000000..d08614cc10bdbe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yezune_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_yezune XlmRoBertaForTokenClassification from yezune +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_yezune +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_yezune` is a English model originally trained by yezune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yezune_en_5.5.0_3.0_1725705204686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yezune_en_5.5.0_3.0_1725705204686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yezune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yezune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_yezune| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/yezune/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_en.md new file mode 100644 index 00000000000000..aac06115df1098 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_yurit04 XlmRoBertaForTokenClassification from yurit04 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_yurit04 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_yurit04` is a English model originally trained by yurit04. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yurit04_en_5.5.0_3.0_1725745089871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yurit04_en_5.5.0_3.0_1725745089871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yurit04","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_yurit04", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_yurit04| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/yurit04/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en.md new file mode 100644 index 00000000000000..71a92ee1592e4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline pipeline XlmRoBertaForTokenClassification from yurit04 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline` is a English model originally trained by yurit04. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en_5.5.0_3.0_1725745152820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline_en_5.5.0_3.0_1725745152820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_yurit04_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/yurit04/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gonalb_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gonalb_en.md new file mode 100644 index 00000000000000..f5d8611776ade5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gonalb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_gonalb XlmRoBertaForTokenClassification from Gonalb +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_gonalb +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_gonalb` is a English model originally trained by Gonalb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gonalb_en_5.5.0_3.0_1725705248324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gonalb_en_5.5.0_3.0_1725705248324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gonalb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gonalb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_gonalb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/Gonalb/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gus07ven_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gus07ven_en.md new file mode 100644 index 00000000000000..2710bab8d07492 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gus07ven_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_gus07ven XlmRoBertaForTokenClassification from gus07ven +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_gus07ven +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_gus07ven` is a English model originally trained by gus07ven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gus07ven_en_5.5.0_3.0_1725743561620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gus07ven_en_5.5.0_3.0_1725743561620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gus07ven","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gus07ven", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_gus07ven| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/gus07ven/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_en.md new file mode 100644 index 00000000000000..cd6aef1398b03f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_gv05 XlmRoBertaForTokenClassification from GV05 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_gv05 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_gv05` is a English model originally trained by GV05. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gv05_en_5.5.0_3.0_1725694453119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gv05_en_5.5.0_3.0_1725694453119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gv05","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_gv05", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_gv05| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/GV05/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en.md new file mode 100644 index 00000000000000..19d68ae5ccabdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_gv05_pipeline pipeline XlmRoBertaForTokenClassification from GV05 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_gv05_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_gv05_pipeline` is a English model originally trained by GV05. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en_5.5.0_3.0_1725694517101.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_gv05_pipeline_en_5.5.0_3.0_1725694517101.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_gv05_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_gv05_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_gv05_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/GV05/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_huangjia_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_huangjia_en.md new file mode 100644 index 00000000000000..ed8fa800f55397 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_huangjia_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_huangjia XlmRoBertaForTokenClassification from huangjia +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_huangjia +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_huangjia` is a English model originally trained by huangjia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_huangjia_en_5.5.0_3.0_1725745226504.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_huangjia_en_5.5.0_3.0_1725745226504.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_huangjia","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_huangjia", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_huangjia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|851.7 MB| + +## References + +https://huggingface.co/huangjia/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en.md new file mode 100644 index 00000000000000..cb19d6cb6742a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_k4west_pipeline pipeline XlmRoBertaForTokenClassification from k4west +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_k4west_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_k4west_pipeline` is a English model originally trained by k4west. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en_5.5.0_3.0_1725704994529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_k4west_pipeline_en_5.5.0_3.0_1725704994529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_k4west_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_k4west_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_k4west_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/k4west/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en.md new file mode 100644 index 00000000000000..98c655dc17bfbe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline pipeline XlmRoBertaForTokenClassification from MonkDalma +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline` is a English model originally trained by MonkDalma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en_5.5.0_3.0_1725704288821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline_en_5.5.0_3.0_1725704288821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_monkdalma_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/MonkDalma/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_en.md new file mode 100644 index 00000000000000..ccc67bf889e903 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sanbatte XlmRoBertaForTokenClassification from sanbatte +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sanbatte +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sanbatte` is a English model originally trained by sanbatte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanbatte_en_5.5.0_3.0_1725743997253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanbatte_en_5.5.0_3.0_1725743997253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sanbatte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sanbatte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sanbatte| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/sanbatte/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en.md new file mode 100644 index 00000000000000..501c388dd4d5d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline pipeline XlmRoBertaForTokenClassification from sanbatte +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline` is a English model originally trained by sanbatte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en_5.5.0_3.0_1725744062522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline_en_5.5.0_3.0_1725744062522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sanbatte_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/sanbatte/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_en.md new file mode 100644 index 00000000000000..320436a702358c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sanyam XlmRoBertaForTokenClassification from Sanyam +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sanyam +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sanyam` is a English model originally trained by Sanyam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanyam_en_5.5.0_3.0_1725693629155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanyam_en_5.5.0_3.0_1725693629155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sanyam","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_sanyam", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sanyam| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|847.3 MB| + +## References + +https://huggingface.co/Sanyam/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en.md new file mode 100644 index 00000000000000..a04fb07d244fe1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_sanyam_pipeline pipeline XlmRoBertaForTokenClassification from Sanyam +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_sanyam_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_sanyam_pipeline` is a English model originally trained by Sanyam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en_5.5.0_3.0_1725693706093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_sanyam_pipeline_en_5.5.0_3.0_1725693706093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_sanyam_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_sanyam_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_sanyam_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|847.3 MB| + +## References + +https://huggingface.co/Sanyam/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_en.md new file mode 100644 index 00000000000000..af7c90789ec642 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_takizawa XlmRoBertaForTokenClassification from takizawa +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_takizawa +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_takizawa` is a English model originally trained by takizawa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_takizawa_en_5.5.0_3.0_1725705980019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_takizawa_en_5.5.0_3.0_1725705980019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_takizawa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_takizawa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_takizawa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/takizawa/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en.md new file mode 100644 index 00000000000000..8b30a6a3e28b08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_takizawa_pipeline pipeline XlmRoBertaForTokenClassification from takizawa +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_takizawa_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_takizawa_pipeline` is a English model originally trained by takizawa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en_5.5.0_3.0_1725706043383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_takizawa_pipeline_en_5.5.0_3.0_1725706043383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_takizawa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_takizawa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_takizawa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/takizawa/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en.md new file mode 100644 index 00000000000000..3e6131a379900c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline pipeline XlmRoBertaForTokenClassification from thkkvui +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline` is a English model originally trained by thkkvui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en_5.5.0_3.0_1725693300534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline_en_5.5.0_3.0_1725693300534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_thkkvui_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/thkkvui/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_en.md new file mode 100644 index 00000000000000..a896531fc3a158 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_vasantha_ai XlmRoBertaForTokenClassification from Vasantha-Ai +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_vasantha_ai +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_vasantha_ai` is a English model originally trained by Vasantha-Ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_vasantha_ai_en_5.5.0_3.0_1725743707066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_vasantha_ai_en_5.5.0_3.0_1725743707066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_vasantha_ai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_vasantha_ai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_vasantha_ai| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/Vasantha-Ai/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en.md new file mode 100644 index 00000000000000..32cfa6b315312e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline pipeline XlmRoBertaForTokenClassification from Vasantha-Ai +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline` is a English model originally trained by Vasantha-Ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en_5.5.0_3.0_1725743776669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline_en_5.5.0_3.0_1725743776669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_vasantha_ai_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/Vasantha-Ai/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_xiao888_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_xiao888_en.md new file mode 100644 index 00000000000000..626b4a43506d02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_german_xiao888_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_xiao888 XlmRoBertaForTokenClassification from Xiao888 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_xiao888 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_xiao888` is a English model originally trained by Xiao888. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_xiao888_en_5.5.0_3.0_1725688425208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_xiao888_en_5.5.0_3.0_1725688425208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_xiao888","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_xiao888", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_xiao888| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|853.8 MB| + +## References + +https://huggingface.co/Xiao888/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en.md new file mode 100644 index 00000000000000..c20fd24c783c4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline pipeline XlmRoBertaForTokenClassification from Chris-choi +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline` is a English model originally trained by Chris-choi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en_5.5.0_3.0_1725687446779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline_en_5.5.0_3.0_1725687446779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_chris_choi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|816.8 MB| + +## References + +https://huggingface.co/Chris-choi/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_en.md new file mode 100644 index 00000000000000..0df3d4ab425741 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_jamie613 XlmRoBertaForTokenClassification from jamie613 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_jamie613 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_jamie613` is a English model originally trained by jamie613. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_jamie613_en_5.5.0_3.0_1725694719475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_jamie613_en_5.5.0_3.0_1725694719475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_jamie613","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_jamie613", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_jamie613| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/jamie613/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en.md new file mode 100644 index 00000000000000..e0da23a59a4015 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline pipeline XlmRoBertaForTokenClassification from jamie613 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline` is a English model originally trained by jamie613. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en_5.5.0_3.0_1725694801201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline_en_5.5.0_3.0_1725694801201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_jamie613_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/jamie613/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_en.md new file mode 100644 index 00000000000000..f306aefe001f18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_lsh231 XlmRoBertaForTokenClassification from lsh231 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_lsh231 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_lsh231` is a English model originally trained by lsh231. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_lsh231_en_5.5.0_3.0_1725744217714.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_lsh231_en_5.5.0_3.0_1725744217714.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_lsh231","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_lsh231", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_lsh231| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|816.8 MB| + +## References + +https://huggingface.co/lsh231/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en.md new file mode 100644 index 00000000000000..db9442ed180cf6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline pipeline XlmRoBertaForTokenClassification from lsh231 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline` is a English model originally trained by lsh231. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en_5.5.0_3.0_1725744312569.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline_en_5.5.0_3.0_1725744312569.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_lsh231_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|816.8 MB| + +## References + +https://huggingface.co/lsh231/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en.md new file mode 100644 index 00000000000000..c58872a4b1fbce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_munsu_pipeline pipeline XlmRoBertaForTokenClassification from MunSu +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_munsu_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_munsu_pipeline` is a English model originally trained by MunSu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en_5.5.0_3.0_1725693924034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_munsu_pipeline_en_5.5.0_3.0_1725693924034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_munsu_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_munsu_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_munsu_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|860.2 MB| + +## References + +https://huggingface.co/MunSu/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en.md new file mode 100644 index 00000000000000..5cd5ad9e9790fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline pipeline XlmRoBertaForTokenClassification from Nobody138 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline` is a English model originally trained by Nobody138. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en_5.5.0_3.0_1725693625085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline_en_5.5.0_3.0_1725693625085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_nobody138_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.7 MB| + +## References + +https://huggingface.co/Nobody138/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_en.md new file mode 100644 index 00000000000000..64bc69e6368d67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_praboda XlmRoBertaForTokenClassification from Praboda +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_praboda +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_praboda` is a English model originally trained by Praboda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_praboda_en_5.5.0_3.0_1725704323976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_praboda_en_5.5.0_3.0_1725704323976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_praboda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_praboda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_praboda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/Praboda/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en.md new file mode 100644 index 00000000000000..be64ab5591d013 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_praboda_pipeline pipeline XlmRoBertaForTokenClassification from Praboda +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_praboda_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_praboda_pipeline` is a English model originally trained by Praboda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en_5.5.0_3.0_1725704410648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_praboda_pipeline_en_5.5.0_3.0_1725704410648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_praboda_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_praboda_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_praboda_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/Praboda/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_en.md new file mode 100644 index 00000000000000..5b7d7372e4ed7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_smilingface88 XlmRoBertaForTokenClassification from smilingface88 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_smilingface88 +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_smilingface88` is a English model originally trained by smilingface88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_smilingface88_en_5.5.0_3.0_1725704826848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_smilingface88_en_5.5.0_3.0_1725704826848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_smilingface88","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_smilingface88", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_smilingface88| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/smilingface88/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en.md new file mode 100644 index 00000000000000..600c049c207f9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline pipeline XlmRoBertaForTokenClassification from smilingface88 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline` is a English model originally trained by smilingface88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en_5.5.0_3.0_1725704909860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline_en_5.5.0_3.0_1725704909860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_smilingface88_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/smilingface88/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_en.md new file mode 100644 index 00000000000000..ad9e77fb71561e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_korean_jhsign XlmRoBertaForTokenClassification from jhsign +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_korean_jhsign +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_korean_jhsign` is a English model originally trained by jhsign. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_korean_jhsign_en_5.5.0_3.0_1725744051688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_korean_jhsign_en_5.5.0_3.0_1725744051688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_korean_jhsign","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_korean_jhsign", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_korean_jhsign| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|844.9 MB| + +## References + +https://huggingface.co/jhsign/xlm-roberta-base-finetuned-panx-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en.md new file mode 100644 index 00000000000000..1e123d9e6d892a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline pipeline XlmRoBertaForTokenClassification from jhsign +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline` is a English model originally trained by jhsign. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en_5.5.0_3.0_1725744115553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline_en_5.5.0_3.0_1725744115553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_korean_jhsign_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|844.9 MB| + +## References + +https://huggingface.co/jhsign/xlm-roberta-base-finetuned-panx-ko + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_sayula_popoluca_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_sayula_popoluca_en.md new file mode 100644 index 00000000000000..543fb83c78351f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_finetuned_sayula_popoluca_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_sayula_popoluca XlmRoBertaForTokenClassification from muhammadbilal +author: John Snow Labs +name: xlm_roberta_base_finetuned_sayula_popoluca +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_sayula_popoluca` is a English model originally trained by muhammadbilal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_sayula_popoluca_en_5.5.0_3.0_1725687290297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_sayula_popoluca_en_5.5.0_3.0_1725687290297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_sayula_popoluca","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_sayula_popoluca", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_sayula_popoluca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|842.3 MB| + +## References + +https://huggingface.co/muhammadbilal/xlm-roberta-base-finetuned-pos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky.md new file mode 100644 index 00000000000000..ced6e9a676e0ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Kirghiz, Kyrgyz xlm_roberta_base_kyrgyzner_ttimur_pipeline pipeline XlmRoBertaForTokenClassification from TTimur +author: John Snow Labs +name: xlm_roberta_base_kyrgyzner_ttimur_pipeline +date: 2024-09-07 +tags: [ky, open_source, pipeline, onnx] +task: Named Entity Recognition +language: ky +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_kyrgyzner_ttimur_pipeline` is a Kirghiz, Kyrgyz model originally trained by TTimur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky_5.5.0_3.0_1725688448638.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_kyrgyzner_ttimur_pipeline_ky_5.5.0_3.0_1725688448638.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_kyrgyzner_ttimur_pipeline", lang = "ky") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_kyrgyzner_ttimur_pipeline", lang = "ky") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_kyrgyzner_ttimur_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ky| +|Size:|777.2 MB| + +## References + +https://huggingface.co/TTimur/xlm-roberta-base-kyrgyzNER + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_language_detection_disaster_twitter_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_language_detection_disaster_twitter_en.md new file mode 100644 index 00000000000000..45319c401b3c43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_language_detection_disaster_twitter_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_language_detection_disaster_twitter XlmRoBertaForSequenceClassification from JiaJiaCen +author: John Snow Labs +name: xlm_roberta_base_language_detection_disaster_twitter +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_language_detection_disaster_twitter` is a English model originally trained by JiaJiaCen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_language_detection_disaster_twitter_en_5.5.0_3.0_1725670094353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_language_detection_disaster_twitter_en_5.5.0_3.0_1725670094353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_language_detection_disaster_twitter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_language_detection_disaster_twitter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_language_detection_disaster_twitter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|892.1 MB| + +## References + +https://huggingface.co/JiaJiaCen/xlm-roberta-base-language-detection-disaster-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx.md new file mode 100644 index 00000000000000..3b03e0f1190b8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlm_roberta_base_multilingual_text_genre_classifier_pipeline pipeline XlmRoBertaForSequenceClassification from classla +author: John Snow Labs +name: xlm_roberta_base_multilingual_text_genre_classifier_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_multilingual_text_genre_classifier_pipeline` is a Multilingual model originally trained by classla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx_5.5.0_3.0_1725670403991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_multilingual_text_genre_classifier_pipeline_xx_5.5.0_3.0_1725670403991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_multilingual_text_genre_classifier_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_multilingual_text_genre_classifier_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_multilingual_text_genre_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|812.2 MB| + +## References + +https://huggingface.co/classla/xlm-roberta-base-multilingual-text-genre-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_xx.md new file mode 100644 index 00000000000000..d50ac2130f7d48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_multilingual_text_genre_classifier_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual xlm_roberta_base_multilingual_text_genre_classifier XlmRoBertaForSequenceClassification from classla +author: John Snow Labs +name: xlm_roberta_base_multilingual_text_genre_classifier +date: 2024-09-07 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_multilingual_text_genre_classifier` is a Multilingual model originally trained by classla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_multilingual_text_genre_classifier_xx_5.5.0_3.0_1725670278332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_multilingual_text_genre_classifier_xx_5.5.0_3.0_1725670278332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_multilingual_text_genre_classifier","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_multilingual_text_genre_classifier", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_multilingual_text_genre_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|812.2 MB| + +## References + +https://huggingface.co/classla/xlm-roberta-base-multilingual-text-genre-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en.md new file mode 100644 index 00000000000000..945986f9bf6b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en_5.5.0_3.0_1725669547396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_en_5.5.0_3.0_1725669547396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|799.7 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-New_VietNam-aug_backtranslation-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en.md new file mode 100644 index 00000000000000..44b0252892ddaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline pipeline XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en_5.5.0_3.0_1725669667401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline_en_5.5.0_3.0_1725669667401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_nepal_bhasa_vietnam_aug_backtranslation_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|799.7 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-New_VietNam-aug_backtranslation-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_panx_dataset_korean_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_panx_dataset_korean_en.md new file mode 100644 index 00000000000000..0890e3952bc17c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_panx_dataset_korean_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_panx_dataset_korean XlmRoBertaForTokenClassification from tner +author: John Snow Labs +name: xlm_roberta_base_panx_dataset_korean +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_panx_dataset_korean` is a English model originally trained by tner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_korean_en_5.5.0_3.0_1725688638394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_panx_dataset_korean_en_5.5.0_3.0_1725688638394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_panx_dataset_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_panx_dataset_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_panx_dataset_korean| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|786.8 MB| + +## References + +https://huggingface.co/tner/xlm-roberta-base-panx-dataset-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_pipeline_xx.md new file mode 100644 index 00000000000000..906d59652e3a5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlm_roberta_base_pipeline pipeline XlmRoBertaEmbeddings from FacebookAI +author: John Snow Labs +name: xlm_roberta_base_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_pipeline` is a Multilingual model originally trained by FacebookAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_pipeline_xx_5.5.0_3.0_1725677450237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_pipeline_xx_5.5.0_3.0_1725677450237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|652.9 MB| + +## References + +https://huggingface.co/FacebookAI/xlm-roberta-base + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_pipeline_te.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_pipeline_te.md new file mode 100644 index 00000000000000..71286e56a59313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_pipeline_te.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Telugu xlm_roberta_base_telugu_transliterate_pipeline pipeline XlmRoBertaForTokenClassification from anishka +author: John Snow Labs +name: xlm_roberta_base_telugu_transliterate_pipeline +date: 2024-09-07 +tags: [te, open_source, pipeline, onnx] +task: Named Entity Recognition +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_telugu_transliterate_pipeline` is a Telugu model originally trained by anishka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_telugu_transliterate_pipeline_te_5.5.0_3.0_1725692980766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_telugu_transliterate_pipeline_te_5.5.0_3.0_1725692980766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_telugu_transliterate_pipeline", lang = "te") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_telugu_transliterate_pipeline", lang = "te") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_telugu_transliterate_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|te| +|Size:|790.9 MB| + +## References + +https://huggingface.co/anishka/xlm-roberta-base-te-transliterate + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_te.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_te.md new file mode 100644 index 00000000000000..a2d19bb8c8f1dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_telugu_transliterate_te.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Telugu xlm_roberta_base_telugu_transliterate XlmRoBertaForTokenClassification from anishka +author: John Snow Labs +name: xlm_roberta_base_telugu_transliterate +date: 2024-09-07 +tags: [te, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: te +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_telugu_transliterate` is a Telugu model originally trained by anishka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_telugu_transliterate_te_5.5.0_3.0_1725692860827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_telugu_transliterate_te_5.5.0_3.0_1725692860827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_telugu_transliterate","te") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_telugu_transliterate", "te") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_telugu_transliterate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|te| +|Size:|790.8 MB| + +## References + +https://huggingface.co/anishka/xlm-roberta-base-te-transliterate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en.md new file mode 100644 index 00000000000000..7958c5ad41f38c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en_5.5.0_3.0_1725670606872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_en_5.5.0_3.0_1725670606872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|387.5 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-ar-30000-tweet-sentiment-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en.md new file mode 100644 index 00000000000000..076856db8c998d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en_5.5.0_3.0_1725670640039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline_en_5.5.0_3.0_1725670640039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_arabic_30000_tweet_sentiment_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|387.5 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-ar-30000-tweet-sentiment-ar + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en.md new file mode 100644 index 00000000000000..9b0177c1fdd906 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en_5.5.0_3.0_1725711796351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese_en_5.5.0_3.0_1725711796351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_trimmed_portuguese_60000_tweet_sentiment_portuguese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|440.7 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-trimmed-pt-60000-tweet-sentiment-pt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_en.md new file mode 100644 index 00000000000000..522cc64cd4a969 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_arabic XlmRoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_arabic +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_arabic` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_en_5.5.0_3.0_1725712243523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_en_5.5.0_3.0_1725712243523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_tweet_sentiment_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_arabic| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|779.7 MB| + +## References + +https://huggingface.co/cardiffnlp/xlm-roberta-base-tweet-sentiment-ar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_pipeline_en.md new file mode 100644 index 00000000000000..0d70636884ccb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_tweet_sentiment_arabic_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_arabic_pipeline pipeline XlmRoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_arabic_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_arabic_pipeline` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_pipeline_en_5.5.0_3.0_1725712378617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_arabic_pipeline_en_5.5.0_3.0_1725712378617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_arabic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_arabic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_arabic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|779.7 MB| + +## References + +https://huggingface.co/cardiffnlp/xlm-roberta-base-tweet-sentiment-ar + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_ukrainian_ner_ukrner_uk.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_ukrainian_ner_ukrner_uk.md new file mode 100644 index 00000000000000..22be2d5957ba97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_ukrainian_ner_ukrner_uk.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Ukrainian xlm_roberta_base_ukrainian_ner_ukrner XlmRoBertaForTokenClassification from EvanD +author: John Snow Labs +name: xlm_roberta_base_ukrainian_ner_ukrner +date: 2024-09-07 +tags: [uk, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: uk +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_ukrainian_ner_ukrner` is a Ukrainian model originally trained by EvanD. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ukrainian_ner_ukrner_uk_5.5.0_3.0_1725743191111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_ukrainian_ner_ukrner_uk_5.5.0_3.0_1725743191111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ukrainian_ner_ukrner","uk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_ukrainian_ner_ukrner", "uk") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_ukrainian_ner_ukrner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|uk| +|Size:|785.4 MB| + +## References + +https://huggingface.co/EvanD/xlm-roberta-base-ukrainian-ner-ukrner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_en.md new file mode 100644 index 00000000000000..bf3101284e2f6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_wnut_ner XlmRoBertaForTokenClassification from dmargutierrez +author: John Snow Labs +name: xlm_roberta_base_wnut_ner +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_wnut_ner` is a English model originally trained by dmargutierrez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wnut_ner_en_5.5.0_3.0_1725743393017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wnut_ner_en_5.5.0_3.0_1725743393017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_wnut_ner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_wnut_ner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_wnut_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|825.8 MB| + +## References + +https://huggingface.co/dmargutierrez/xlm-roberta-base-WNUT-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_pipeline_en.md new file mode 100644 index 00000000000000..935b442359c32f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_wnut_ner_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_wnut_ner_pipeline pipeline XlmRoBertaForTokenClassification from dmargutierrez +author: John Snow Labs +name: xlm_roberta_base_wnut_ner_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_wnut_ner_pipeline` is a English model originally trained by dmargutierrez. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wnut_ner_pipeline_en_5.5.0_3.0_1725743483964.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_wnut_ner_pipeline_en_5.5.0_3.0_1725743483964.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_wnut_ner_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_wnut_ner_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_wnut_ner_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|825.9 MB| + +## References + +https://huggingface.co/dmargutierrez/xlm-roberta-base-WNUT-ner + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en.md new file mode 100644 index 00000000000000..efa1dab5d823c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_xnli_arabic_trimmed_arabic_30000 XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_xnli_arabic_trimmed_arabic_30000 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_xnli_arabic_trimmed_arabic_30000` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en_5.5.0_3.0_1725670473835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xnli_arabic_trimmed_arabic_30000_en_5.5.0_3.0_1725670473835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_xnli_arabic_trimmed_arabic_30000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_xnli_arabic_trimmed_arabic_30000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_xnli_arabic_trimmed_arabic_30000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|397.8 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-xnli-ar-trimmed-ar-30000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xx.md new file mode 100644 index 00000000000000..c60bad177cc9f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_base_xx.md @@ -0,0 +1,75 @@ +--- +layout: model +title: XLM-RoBERTa Base (xlm_roberta_base) +author: John Snow Labs +name: xlm_roberta_base +date: 2024-09-07 +tags: [xx, multilingual, embeddings, xlm_roberta, open_source, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +[XLM-RoBERTa](https://ai.facebook.com/blog/-xlm-r-state-of-the-art-cross-lingual-understanding-through-self-supervision/) is a scaled cross-lingual sentence encoder. It is trained on 2.5T of data across 100 languages data filtered from Common Crawl. XLM-R achieves state-of-the-arts results on multiple cross-lingual benchmarks. + +The XLM-RoBERTa model was proposed in [Unsupervised Cross-lingual Representation Learning at Scale](https://arxiv.org/abs/1911.02116) by Alexis Conneau, Kartikay Khandelwal, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer, and Veselin Stoyanov. + +It is based on Facebook's RoBERTa model released in 2019. It is a large multi-lingual language model, trained on 2.5TB of filtered CommonCrawl data. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xx_5.5.0_3.0_1725677268573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_xx_5.5.0_3.0_1725677268573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base", "xx") \ +.setInputCols("sentence", "token") \ +.setOutputCol("embeddings") +``` +```scala +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base", "xx") +.setInputCols("sentence", "token") +.setOutputCol("embeddings") +val pipeline = new Pipeline().setStages(Array(document_assembler, sentence_detector, tokenizer, embeddings)) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.embed.xlm").predict("""Put your text here.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|xx| +|Size:|652.9 MB| \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_finetuned_emojis_non_iid_fed_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_finetuned_emojis_non_iid_fed_en.md new file mode 100644 index 00000000000000..89563126b88f0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_finetuned_emojis_non_iid_fed_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_finetuned_emojis_non_iid_fed XlmRoBertaForSequenceClassification from Karim-Gamal +author: John Snow Labs +name: xlm_roberta_finetuned_emojis_non_iid_fed +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_finetuned_emojis_non_iid_fed` is a English model originally trained by Karim-Gamal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_emojis_non_iid_fed_en_5.5.0_3.0_1725712883653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_finetuned_emojis_non_iid_fed_en_5.5.0_3.0_1725712883653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_finetuned_emojis_non_iid_fed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_finetuned_emojis_non_iid_fed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_finetuned_emojis_non_iid_fed| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Karim-Gamal/XLM-Roberta-finetuned-emojis-non-IID-Fed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_pipeline_xx.md new file mode 100644 index 00000000000000..2a6b44279d55ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlm_roberta_german_sentiment_pipeline pipeline XlmRoBertaForSequenceClassification from ssary +author: John Snow Labs +name: xlm_roberta_german_sentiment_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_german_sentiment_pipeline` is a Multilingual model originally trained by ssary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_german_sentiment_pipeline_xx_5.5.0_3.0_1725713067379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_german_sentiment_pipeline_xx_5.5.0_3.0_1725713067379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_german_sentiment_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_german_sentiment_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_german_sentiment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ssary/XLM-RoBERTa-German-sentiment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_xx.md new file mode 100644 index 00000000000000..076918a8d72d93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_german_sentiment_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual xlm_roberta_german_sentiment XlmRoBertaForSequenceClassification from ssary +author: John Snow Labs +name: xlm_roberta_german_sentiment +date: 2024-09-07 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_german_sentiment` is a Multilingual model originally trained by ssary. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_german_sentiment_xx_5.5.0_3.0_1725713019707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_german_sentiment_xx_5.5.0_3.0_1725713019707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_german_sentiment","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_german_sentiment", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_german_sentiment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|1.0 GB| + +## References + +https://huggingface.co/ssary/XLM-RoBERTa-German-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en.md new file mode 100644 index 00000000000000..fe064a9c0f4ffe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English XlmRoBertaForQuestionAnswering (from teacookies) +author: John Snow Labs +name: xlm_roberta_qa_autonlp_roberta_base_squad2_24465516 +date: 2024-09-07 +tags: [en, open_source, question_answering, xlmroberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-roberta-base-squad2-24465516` is a English model originally trained by `teacookies`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en_5.5.0_3.0_1725720560920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_en_5.5.0_3.0_1725720560920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlm_roberta_qa_autonlp_roberta_base_squad2_24465516","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering +.pretrained("xlm_roberta_qa_autonlp_roberta_base_squad2_24465516","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.xlm_roberta.base_24465516.by_teacookies").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_roberta_base_squad2_24465516| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|887.4 MB| + +## References + +References + +- https://huggingface.co/teacookies/autonlp-roberta-base-squad2-24465516 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en.md new file mode 100644 index 00000000000000..b0ba9faeb946a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline pipeline XlmRoBertaForQuestionAnswering from teacookies +author: John Snow Labs +name: xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline` is a English model originally trained by teacookies. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en_5.5.0_3.0_1725720622194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline_en_5.5.0_3.0_1725720622194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_qa_autonlp_roberta_base_squad2_24465516_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|887.4 MB| + +## References + +https://huggingface.co/teacookies/autonlp-roberta-base-squad2-24465516 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_base_toxicity_classifier_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_base_toxicity_classifier_xx.md new file mode 100644 index 00000000000000..ddb22f37ba25d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_base_toxicity_classifier_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual xlmr_base_toxicity_classifier XlmRoBertaForSequenceClassification from textdetox +author: John Snow Labs +name: xlmr_base_toxicity_classifier +date: 2024-09-07 +tags: [xx, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_base_toxicity_classifier` is a Multilingual model originally trained by textdetox. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_base_toxicity_classifier_xx_5.5.0_3.0_1725670848919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_base_toxicity_classifier_xx_5.5.0_3.0_1725670848919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_base_toxicity_classifier","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_base_toxicity_classifier", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_base_toxicity_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|xx| +|Size:|876.6 MB| + +## References + +https://huggingface.co/textdetox/xlmr-base-toxicity-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_en.md new file mode 100644 index 00000000000000..3feb2487bcfdf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_english_german_norwegian_shuffled_orig_test1000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_norwegian_shuffled_orig_test1000 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_norwegian_shuffled_orig_test1000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_norwegian_shuffled_orig_test1000_en_5.5.0_3.0_1725711940317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_norwegian_shuffled_orig_test1000_en_5.5.0_3.0_1725711940317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_norwegian_shuffled_orig_test1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_english_german_norwegian_shuffled_orig_test1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_norwegian_shuffled_orig_test1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|826.8 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-no_shuffled-orig-test1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en.md new file mode 100644 index 00000000000000..517905fc074045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en_5.5.0_3.0_1725712049149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline_en_5.5.0_3.0_1725712049149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_english_german_norwegian_shuffled_orig_test1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.8 MB| + +## References + +https://huggingface.co/patpizio/xlmr-en-de-no_shuffled-orig-test1000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_en.md new file mode 100644 index 00000000000000..a6a53ee69131d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_estonian_english_all_shuffled_42_test1000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_estonian_english_all_shuffled_42_test1000 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_estonian_english_all_shuffled_42_test1000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_all_shuffled_42_test1000_en_5.5.0_3.0_1725712708566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_all_shuffled_42_test1000_en_5.5.0_3.0_1725712708566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_estonian_english_all_shuffled_42_test1000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_estonian_english_all_shuffled_42_test1000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_estonian_english_all_shuffled_42_test1000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|819.0 MB| + +## References + +https://huggingface.co/patpizio/xlmr-et-en-all_shuffled-42-test1000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en.md new file mode 100644 index 00000000000000..97d47e0402bca5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_estonian_english_all_shuffled_42_test1000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_estonian_english_all_shuffled_42_test1000_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_estonian_english_all_shuffled_42_test1000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en_5.5.0_3.0_1725712823675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_estonian_english_all_shuffled_42_test1000_pipeline_en_5.5.0_3.0_1725712823675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_estonian_english_all_shuffled_42_test1000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_estonian_english_all_shuffled_42_test1000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_estonian_english_all_shuffled_42_test1000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|819.1 MB| + +## References + +https://huggingface.co/patpizio/xlmr-et-en-all_shuffled-42-test1000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_en.md new file mode 100644 index 00000000000000..8f24ddf0a38753 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_finetuned_fquad XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_fquad +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_fquad` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_fquad_en_5.5.0_3.0_1725710648946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_fquad_en_5.5.0_3.0_1725710648946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_fquad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_finetuned_fquad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_fquad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|867.7 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-FQuAD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_pipeline_en.md new file mode 100644 index 00000000000000..fcb383f907dc27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_finetuned_fquad_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_finetuned_fquad_pipeline pipeline XlmRoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: xlmr_finetuned_fquad_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_finetuned_fquad_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_fquad_pipeline_en_5.5.0_3.0_1725710711101.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_finetuned_fquad_pipeline_en_5.5.0_3.0_1725710711101.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_finetuned_fquad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_finetuned_fquad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_finetuned_fquad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|867.7 MB| + +## References + +https://huggingface.co/lielbin/XLMR-finetuned-FQuAD + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_en.md new file mode 100644 index 00000000000000..fc4a49fa9e2c2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_nepali_english_train_shuffled_1986_test2000 XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_nepali_english_train_shuffled_1986_test2000 +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_nepali_english_train_shuffled_1986_test2000` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_nepali_english_train_shuffled_1986_test2000_en_5.5.0_3.0_1725712494865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_nepali_english_train_shuffled_1986_test2000_en_5.5.0_3.0_1725712494865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_nepali_english_train_shuffled_1986_test2000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlmr_nepali_english_train_shuffled_1986_test2000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_nepali_english_train_shuffled_1986_test2000| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|817.7 MB| + +## References + +https://huggingface.co/patpizio/xlmr-ne-en-train_shuffled-1986-test2000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en.md new file mode 100644 index 00000000000000..f385d3ba054395 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_nepali_english_train_shuffled_1986_test2000_pipeline pipeline XlmRoBertaForSequenceClassification from patpizio +author: John Snow Labs +name: xlmr_nepali_english_train_shuffled_1986_test2000_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_nepali_english_train_shuffled_1986_test2000_pipeline` is a English model originally trained by patpizio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725712609400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_nepali_english_train_shuffled_1986_test2000_pipeline_en_5.5.0_3.0_1725712609400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_nepali_english_train_shuffled_1986_test2000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_nepali_english_train_shuffled_1986_test2000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_nepali_english_train_shuffled_1986_test2000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|817.7 MB| + +## References + +https://huggingface.co/patpizio/xlmr-ne-en-train_shuffled-1986-test2000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_en.md new file mode 100644 index 00000000000000..80a684b18d35e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlmr_qa_extraction_english XlmRoBertaForTokenClassification from TurkuNLP +author: John Snow Labs +name: xlmr_qa_extraction_english +date: 2024-09-07 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_qa_extraction_english` is a English model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_qa_extraction_english_en_5.5.0_3.0_1725743348760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_qa_extraction_english_en_5.5.0_3.0_1725743348760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlmr_qa_extraction_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlmr_qa_extraction_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_qa_extraction_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|797.4 MB| + +## References + +https://huggingface.co/TurkuNLP/xlmr-qa-extraction-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_pipeline_en.md new file mode 100644 index 00000000000000..d9f2336b8d7760 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_qa_extraction_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlmr_qa_extraction_english_pipeline pipeline XlmRoBertaForTokenClassification from TurkuNLP +author: John Snow Labs +name: xlmr_qa_extraction_english_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_qa_extraction_english_pipeline` is a English model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_qa_extraction_english_pipeline_en_5.5.0_3.0_1725743479186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_qa_extraction_english_pipeline_en_5.5.0_3.0_1725743479186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_qa_extraction_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_qa_extraction_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_qa_extraction_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|797.4 MB| + +## References + +https://huggingface.co/TurkuNLP/xlmr-qa-extraction-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_squad2_webis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_squad2_webis_pipeline_en.md new file mode 100644 index 00000000000000..76c71c5b70081b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_squad2_webis_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmr_squad2_webis_pipeline pipeline XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlmr_squad2_webis_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_squad2_webis_pipeline` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_squad2_webis_pipeline_en_5.5.0_3.0_1725686025655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_squad2_webis_pipeline_en_5.5.0_3.0_1725686025655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmr_squad2_webis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmr_squad2_webis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_squad2_webis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|876.7 MB| + +## References + +https://huggingface.co/intanm/xlmr-squad2-webis + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmr_webis_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmr_webis_en.md new file mode 100644 index 00000000000000..5bb76c6389c651 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmr_webis_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmr_webis XlmRoBertaForQuestionAnswering from intanm +author: John Snow Labs +name: xlmr_webis +date: 2024-09-07 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmr_webis` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmr_webis_en_5.5.0_3.0_1725686375583.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmr_webis_en_5.5.0_3.0_1725686375583.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_webis","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmr_webis", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmr_webis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/intanm/xlmr-webis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_mr.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_mr.md new file mode 100644 index 00000000000000..60eb4f5498f513 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_mr.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Marathi XLMRoBerta Embeddings (from l3cube-pune) +author: John Snow Labs +name: xlmroberta_embeddings_marathi_roberta +date: 2024-09-07 +tags: [mr, open_source, xlm_roberta, embeddings, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRoBERTa Embeddings model, uploaded to Hugging Face, adapted and imported into Spark NLP. `marathi-roberta` is a Marathi model orginally trained by `l3cube-pune`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_marathi_roberta_mr_5.5.0_3.0_1725677019423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_marathi_roberta_mr_5.5.0_3.0_1725677019423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_marathi_roberta","mr") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["मला स्पार्क एनएलपी आवडते"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlmroberta_embeddings_marathi_roberta","mr") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("मला स्पार्क एनएलपी आवडते").toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_embeddings_marathi_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|mr| +|Size:|1.0 GB| + +## References + +References + +- https://huggingface.co/l3cube-pune/marathi-roberta +- https://github.com/l3cube-pune/MarathiNLP +- https://arxiv.org/abs/2202.01159 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_pipeline_mr.md new file mode 100644 index 00000000000000..b4383eff841010 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_embeddings_marathi_roberta_pipeline_mr.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Marathi xlmroberta_embeddings_marathi_roberta_pipeline pipeline XlmRoBertaEmbeddings from l3cube-pune +author: John Snow Labs +name: xlmroberta_embeddings_marathi_roberta_pipeline +date: 2024-09-07 +tags: [mr, open_source, pipeline, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_embeddings_marathi_roberta_pipeline` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_marathi_roberta_pipeline_mr_5.5.0_3.0_1725677075744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_embeddings_marathi_roberta_pipeline_mr_5.5.0_3.0_1725677075744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_embeddings_marathi_roberta_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_embeddings_marathi_roberta_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_embeddings_marathi_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|1.0 GB| + +## References + +https://huggingface.co/l3cube-pune/marathi-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de.md new file mode 100644 index 00000000000000..afed0e52f2146b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de.md @@ -0,0 +1,70 @@ +--- +layout: model +title: German xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline pipeline XlmRoBertaForTokenClassification from aytugkaya +author: John Snow Labs +name: xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline +date: 2024-09-07 +tags: [de, open_source, pipeline, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline` is a German model originally trained by aytugkaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725743975374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline_de_5.5.0_3.0_1725743975374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline", lang = "de") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline", lang = "de") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_aytugkaya_base_finetuned_panx_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|de| +|Size:|853.7 MB| + +## References + +https://huggingface.co/aytugkaya/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_fin_pipeline_fi.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_fin_pipeline_fi.md new file mode 100644 index 00000000000000..6f22c5c36fad9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_fin_pipeline_fi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Finnish xlmroberta_ner_base_fin_pipeline pipeline XlmRoBertaForTokenClassification from tner +author: John Snow Labs +name: xlmroberta_ner_base_fin_pipeline +date: 2024-09-07 +tags: [fi, open_source, pipeline, onnx] +task: Named Entity Recognition +language: fi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_fin_pipeline` is a Finnish model originally trained by tner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_fin_pipeline_fi_5.5.0_3.0_1725687960896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_fin_pipeline_fi_5.5.0_3.0_1725687960896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_base_fin_pipeline", lang = "fi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_base_fin_pipeline", lang = "fi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_fin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fi| +|Size:|773.1 MB| + +## References + +https://huggingface.co/tner/xlm-roberta-base-fin + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo.md new file mode 100644 index 00000000000000..9004c94e83ff48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Dholuo, Luo (Kenya and Tanzania) xlmroberta_ner_base_finetuned_dholuo_finetuned_ner XlmRoBertaForTokenClassification from mbeukman +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_dholuo_finetuned_ner +date: 2024-09-07 +tags: [luo, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: luo +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_finetuned_dholuo_finetuned_ner` is a Dholuo, Luo (Kenya and Tanzania) model originally trained by mbeukman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo_5.5.0_3.0_1725688973300.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_dholuo_finetuned_ner_luo_5.5.0_3.0_1725688973300.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_dholuo_finetuned_ner","luo") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_dholuo_finetuned_ner", "luo") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_dholuo_finetuned_ner| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|luo| +|Size:|1.0 GB| + +## References + +https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-luo-finetuned-ner-luo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw.md new file mode 100644 index 00000000000000..1e1373a6549bd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Swahili (macrolanguage) xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline pipeline XlmRoBertaForTokenClassification from mbeukman +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline +date: 2024-09-07 +tags: [sw, open_source, pipeline, onnx] +task: Named Entity Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline` is a Swahili (macrolanguage) model originally trained by mbeukman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw_5.5.0_3.0_1725745396218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline_sw_5.5.0_3.0_1725745396218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline", lang = "sw") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline", lang = "sw") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|sw| +|Size:|1.0 GB| + +## References + +https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-kinyarwanda-finetuned-ner-swahili + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw.md new file mode 100644 index 00000000000000..5936e6ea90d938 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw.md @@ -0,0 +1,115 @@ +--- +layout: model +title: Swahili XLMRobertaForTokenClassification Base Cased model (from mbeukman) +author: John Snow Labs +name: xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili +date: 2024-09-07 +tags: [sw, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: sw +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-kinyarwanda-finetuned-ner-swahili` is a Swahili model originally trained by `mbeukman`. + +## Predicted Entities + +`PER`, `DATE`, `ORG`, `LOC` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw_5.5.0_3.0_1725745349726.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili_sw_5.5.0_3.0_1725745349726.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili","sw") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili","sw") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("sw.ner.xlmr_roberta.base_finetuned_kinyarwanda.by_mbeukman").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_finetuned_kinyarwanda_finetuned_ner_swahili| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|sw| +|Size:|1.0 GB| + +## References + +References + +- https://huggingface.co/mbeukman/xlm-roberta-base-finetuned-kinyarwanda-finetuned-ner-swahili +- https://arxiv.org/abs/2103.11811 +- https://github.com/Michael-Beukman/NERTransfer +- https://github.com/masakhane-io/masakhane-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_uncased_mit_movie_trivia_en.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_uncased_mit_movie_trivia_en.md new file mode 100644 index 00000000000000..f77553874a0204 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_base_uncased_mit_movie_trivia_en.md @@ -0,0 +1,113 @@ +--- +layout: model +title: English XLMRobertaForTokenClassification Base Uncased model (from tner) +author: John Snow Labs +name: xlmroberta_ner_base_uncased_mit_movie_trivia +date: 2024-09-07 +tags: [en, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-uncased-mit-movie-trivia` is a English model originally trained by `tner`. + +## Predicted Entities + +`actor`, `plot`, `origin`, `award`, `character name`, `relationship`, `opinion`, `director`, `genre`, `soundtrack`, `quote`, `date` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_uncased_mit_movie_trivia_en_5.5.0_3.0_1725744264307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_base_uncased_mit_movie_trivia_en_5.5.0_3.0_1725744264307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_uncased_mit_movie_trivia","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_base_uncased_mit_movie_trivia","en") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.ner.xlmr_roberta.trivia_movie.uncased_base").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_base_uncased_mit_movie_trivia| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|779.9 MB| + +## References + +References + +- https://huggingface.co/tner/xlm-roberta-base-uncased-mit-movie-trivia +- https://github.com/asahi417/tner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx.md new file mode 100644 index 00000000000000..4f63025fa2b8e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Multilingual XLMRobertaForTokenClassification Base Cased model (from edwardjross) +author: John Snow Labs +name: xlmroberta_ner_edwardjross_base_finetuned_panx_all +date: 2024-09-07 +tags: [xx, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-all` is a Multilingual model originally trained by `edwardjross`. + +## Predicted Entities + +`ORG`, `LOC`, `PER` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx_5.5.0_3.0_1725688041749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_edwardjross_base_finetuned_panx_all_xx_5.5.0_3.0_1725688041749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_edwardjross_base_finetuned_panx_all","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_edwardjross_base_finetuned_panx_all","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.ner.xlmr_roberta.base_finetuned_panx_all.by_edwardjross").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_edwardjross_base_finetuned_panx_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|862.0 MB| + +## References + +References + +- https://huggingface.co/edwardjross/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx.md new file mode 100644 index 00000000000000..3514f08bfb7c7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline pipeline XlmRoBertaForTokenClassification from Neha2608 +author: John Snow Labs +name: xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline +date: 2024-09-07 +tags: [xx, open_source, pipeline, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline` is a Multilingual model originally trained by Neha2608. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725744779285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline_xx_5.5.0_3.0_1725744779285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_neha2608_base_finetuned_panx_all_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|861.0 MB| + +## References + +https://huggingface.co/Neha2608/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_xx.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_xx.md new file mode 100644 index 00000000000000..4a3897a3244a03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_neha2608_base_finetuned_panx_all_xx.md @@ -0,0 +1,112 @@ +--- +layout: model +title: Multilingual XLMRobertaForTokenClassification Base Cased model (from Neha2608) +author: John Snow Labs +name: xlmroberta_ner_neha2608_base_finetuned_panx_all +date: 2024-09-07 +tags: [xx, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-all` is a Multilingual model originally trained by `Neha2608`. + +## Predicted Entities + +`ORG`, `LOC`, `PER` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_neha2608_base_finetuned_panx_all_xx_5.5.0_3.0_1725744716089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_neha2608_base_finetuned_panx_all_xx_5.5.0_3.0_1725744716089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_neha2608_base_finetuned_panx_all","xx") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_neha2608_base_finetuned_panx_all","xx") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("xx.ner.xlmr_roberta.base_finetuned_panx_all.by_neha2608").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_neha2608_base_finetuned_panx_all| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|861.0 MB| + +## References + +References + +- https://huggingface.co/Neha2608/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_rgl73_base_finetuned_panx_de.md b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_rgl73_base_finetuned_panx_de.md new file mode 100644 index 00000000000000..5a0a30fce3b8a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xlmroberta_ner_rgl73_base_finetuned_panx_de.md @@ -0,0 +1,113 @@ +--- +layout: model +title: German XLMRobertaForTokenClassification Base Cased model (from Rgl73) +author: John Snow Labs +name: xlmroberta_ner_rgl73_base_finetuned_panx +date: 2024-09-07 +tags: [de, open_source, xlm_roberta, ner, onnx] +task: Named Entity Recognition +language: de +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XLMRobertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `xlm-roberta-base-finetuned-panx-de` is a German model originally trained by `Rgl73`. + +## Predicted Entities + +`PER`, `LOC`, `ORG` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rgl73_base_finetuned_panx_de_5.5.0_3.0_1725687564043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_ner_rgl73_base_finetuned_panx_de_5.5.0_3.0_1725687564043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_rgl73_base_finetuned_panx","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("ner") + +ner_converter = NerConverter()\ + .setInputCols(["document", "token", "ner"])\ + .setOutputCol("ner_chunk") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, token_classifier, ner_converter]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val token_classifier = XlmRoBertaForTokenClassification.pretrained("xlmroberta_ner_rgl73_base_finetuned_panx","de") + .setInputCols(Array("document", "token")) + .setOutputCol("ner") + +val ner_converter = new NerConverter() + .setInputCols(Array("document", "token', "ner")) + .setOutputCol("ner_chunk") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, token_classifier, ner_converter)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("de.ner.xlmr_roberta.xtreme.base_finetuned.by_Rgl73").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_ner_rgl73_base_finetuned_panx| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|de| +|Size:|853.4 MB| + +## References + +References + +- https://huggingface.co/Rgl73/xlm-roberta-base-finetuned-panx-de +- https://paperswithcode.com/sota?task=Token+Classification&dataset=xtreme \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_en.md b/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_en.md new file mode 100644 index 00000000000000..16aa01dbc9a058 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xnli_xlm_r_only_turkish XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: xnli_xlm_r_only_turkish +date: 2024-09-07 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xnli_xlm_r_only_turkish` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_turkish_en_5.5.0_3.0_1725712406064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_turkish_en_5.5.0_3.0_1725712406064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xnli_xlm_r_only_turkish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xnli_xlm_r_only_turkish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xnli_xlm_r_only_turkish| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|813.7 MB| + +## References + +https://huggingface.co/semindan/xnli_xlm_r_only_tr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_pipeline_en.md new file mode 100644 index 00000000000000..a0433bc939f371 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-07-xnli_xlm_r_only_turkish_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xnli_xlm_r_only_turkish_pipeline pipeline XlmRoBertaForSequenceClassification from semindan +author: John Snow Labs +name: xnli_xlm_r_only_turkish_pipeline +date: 2024-09-07 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xnli_xlm_r_only_turkish_pipeline` is a English model originally trained by semindan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_turkish_pipeline_en_5.5.0_3.0_1725712528880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xnli_xlm_r_only_turkish_pipeline_en_5.5.0_3.0_1725712528880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xnli_xlm_r_only_turkish_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xnli_xlm_r_only_turkish_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xnli_xlm_r_only_turkish_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|813.7 MB| + +## References + +https://huggingface.co/semindan/xnli_xlm_r_only_tr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-afro_xlmr_base_finetuned_kintweetsb_en.md b/docs/_posts/ahmedlone127/2024-09-08-afro_xlmr_base_finetuned_kintweetsb_en.md new file mode 100644 index 00000000000000..1e81eb05e9d16f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-afro_xlmr_base_finetuned_kintweetsb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English afro_xlmr_base_finetuned_kintweetsb XlmRoBertaEmbeddings from RogerB +author: John Snow Labs +name: afro_xlmr_base_finetuned_kintweetsb +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`afro_xlmr_base_finetuned_kintweetsb` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsb_en_5.5.0_3.0_1725770456859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/afro_xlmr_base_finetuned_kintweetsb_en_5.5.0_3.0_1725770456859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("afro_xlmr_base_finetuned_kintweetsb","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("afro_xlmr_base_finetuned_kintweetsb","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|afro_xlmr_base_finetuned_kintweetsb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/RogerB/afro-xlmr-base-finetuned-kintweetsB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-agnews_padding60model_en.md b/docs/_posts/ahmedlone127/2024-09-08-agnews_padding60model_en.md new file mode 100644 index 00000000000000..fd1a98bf095f23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-agnews_padding60model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English agnews_padding60model DistilBertForSequenceClassification from Realgon +author: John Snow Labs +name: agnews_padding60model +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`agnews_padding60model` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/agnews_padding60model_en_5.5.0_3.0_1725775162250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/agnews_padding60model_en_5.5.0_3.0_1725775162250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("agnews_padding60model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("agnews_padding60model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|agnews_padding60model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Realgon/agnews_padding60model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_en.md b/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_en.md new file mode 100644 index 00000000000000..b4ae08f8d14da5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English albert_base_v2_weighted_hoax_classifier_definition AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_weighted_hoax_classifier_definition +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_weighted_hoax_classifier_definition` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_weighted_hoax_classifier_definition_en_5.5.0_3.0_1725755698545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_weighted_hoax_classifier_definition_en_5.5.0_3.0_1725755698545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_weighted_hoax_classifier_definition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_weighted_hoax_classifier_definition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_weighted_hoax_classifier_definition| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_weighted_hoax_classifier_definition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_pipeline_en.md new file mode 100644 index 00000000000000..b177f021a9a92f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-albert_base_v2_weighted_hoax_classifier_definition_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_base_v2_weighted_hoax_classifier_definition_pipeline pipeline AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_weighted_hoax_classifier_definition_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_weighted_hoax_classifier_definition_pipeline` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_weighted_hoax_classifier_definition_pipeline_en_5.5.0_3.0_1725755700946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_weighted_hoax_classifier_definition_pipeline_en_5.5.0_3.0_1725755700946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_base_v2_weighted_hoax_classifier_definition_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_base_v2_weighted_hoax_classifier_definition_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_weighted_hoax_classifier_definition_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_weighted_hoax_classifier_definition + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_fa.md b/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_fa.md new file mode 100644 index 00000000000000..1b3c56c6198222 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_fa.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_sentiment_digikala AlbertForSequenceClassification from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_sentiment_digikala +date: 2024-09-08 +tags: [fa, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_sentiment_digikala` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_digikala_fa_5.5.0_3.0_1725755481622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_digikala_fa_5.5.0_3.0_1725755481622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_persian_farsi_base_v2_sentiment_digikala","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_persian_farsi_base_v2_sentiment_digikala", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_sentiment_digikala| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fa| +|Size:|68.5 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2-sentiment-digikala \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa.md b/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa.md new file mode 100644 index 00000000000000..7966634cf30058 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_sentiment_digikala_pipeline pipeline AlbertForSequenceClassification from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_sentiment_digikala_pipeline +date: 2024-09-08 +tags: [fa, open_source, pipeline, onnx] +task: Text Classification +language: fa +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_sentiment_digikala_pipeline` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa_5.5.0_3.0_1725755485085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_sentiment_digikala_pipeline_fa_5.5.0_3.0_1725755485085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_persian_farsi_base_v2_sentiment_digikala_pipeline", lang = "fa") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_persian_farsi_base_v2_sentiment_digikala_pipeline", lang = "fa") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_sentiment_digikala_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|fa| +|Size:|68.6 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2-sentiment-digikala + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en.md new file mode 100644 index 00000000000000..4dca93e92eff75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline pipeline AlbertForSequenceClassification from JiaJiaCen +author: John Snow Labs +name: albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline` is a English model originally trained by JiaJiaCen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en_5.5.0_3.0_1725755586039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline_en_5.5.0_3.0_1725755586039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxlarge_v2_disaster_twitter_preprocess_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/JiaJiaCen/albert-xxlarge-v2-disaster-twitter-preprocess_data + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en.md b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en.md new file mode 100644 index 00000000000000..a733a84a6874fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3 MPNetEmbeddings from luiz-and-robert-thesis +author: John Snow Labs +name: all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3` is a English model originally trained by luiz-and-robert-thesis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en_5.5.0_3.0_1725769496668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3_en_5.5.0_3.0_1725769496668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_lr_1e_8_margin_5_epoch_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/luiz-and-robert-thesis/all-mpnet-base-v2-lr-1e-8-margin-5-epoch-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_navteca_en.md b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_navteca_en.md new file mode 100644 index 00000000000000..e86339ef18087e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_navteca_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_navteca MPNetEmbeddings from navteca +author: John Snow Labs +name: all_mpnet_base_v2_navteca +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_navteca` is a English model originally trained by navteca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_navteca_en_5.5.0_3.0_1725769315954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_navteca_en_5.5.0_3.0_1725769315954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_navteca","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_navteca","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_navteca| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/navteca/all-mpnet-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_topic_abstract_similarity_en.md b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_topic_abstract_similarity_en.md new file mode 100644 index 00000000000000..bd2ea84cd354ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_base_v2_topic_abstract_similarity_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_base_v2_topic_abstract_similarity MPNetEmbeddings from Eitanli +author: John Snow Labs +name: all_mpnet_base_v2_topic_abstract_similarity +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_base_v2_topic_abstract_similarity` is a English model originally trained by Eitanli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_topic_abstract_similarity_en_5.5.0_3.0_1725769327853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_base_v2_topic_abstract_similarity_en_5.5.0_3.0_1725769327853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_topic_abstract_similarity","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_base_v2_topic_abstract_similarity","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_base_v2_topic_abstract_similarity| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Eitanli/all-mpnet-base-v2-topic-abstract-similarity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_en.md b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_en.md new file mode 100644 index 00000000000000..4a03ffbc068f42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English all_mpnet_janet_10k_v1 MPNetEmbeddings from IconicAI +author: John Snow Labs +name: all_mpnet_janet_10k_v1 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_janet_10k_v1` is a English model originally trained by IconicAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_janet_10k_v1_en_5.5.0_3.0_1725769691841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_janet_10k_v1_en_5.5.0_3.0_1725769691841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("all_mpnet_janet_10k_v1","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("all_mpnet_janet_10k_v1","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_janet_10k_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/IconicAI/all-mpnet-janet-10k-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_pipeline_en.md new file mode 100644 index 00000000000000..4d5cf30ec06413 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-all_mpnet_janet_10k_v1_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English all_mpnet_janet_10k_v1_pipeline pipeline MPNetEmbeddings from IconicAI +author: John Snow Labs +name: all_mpnet_janet_10k_v1_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_mpnet_janet_10k_v1_pipeline` is a English model originally trained by IconicAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_mpnet_janet_10k_v1_pipeline_en_5.5.0_3.0_1725769711985.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_mpnet_janet_10k_v1_pipeline_en_5.5.0_3.0_1725769711985.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("all_mpnet_janet_10k_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("all_mpnet_janet_10k_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_mpnet_janet_10k_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/IconicAI/all-mpnet-janet-10k-v1 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-amazonpolarity_fewshot_en.md b/docs/_posts/ahmedlone127/2024-09-08-amazonpolarity_fewshot_en.md new file mode 100644 index 00000000000000..c99dc6ca64c7ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-amazonpolarity_fewshot_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English amazonpolarity_fewshot MPNetEmbeddings from pig4431 +author: John Snow Labs +name: amazonpolarity_fewshot +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`amazonpolarity_fewshot` is a English model originally trained by pig4431. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/amazonpolarity_fewshot_en_5.5.0_3.0_1725770072463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/amazonpolarity_fewshot_en_5.5.0_3.0_1725770072463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("amazonpolarity_fewshot","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("amazonpolarity_fewshot","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|amazonpolarity_fewshot| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/pig4431/amazonPolarity_fewshot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-analisis_sentimientos_beto_tass_c_en.md b/docs/_posts/ahmedlone127/2024-09-08-analisis_sentimientos_beto_tass_c_en.md new file mode 100644 index 00000000000000..ee433c881570e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-analisis_sentimientos_beto_tass_c_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English analisis_sentimientos_beto_tass_c BertForSequenceClassification from raulgdp +author: John Snow Labs +name: analisis_sentimientos_beto_tass_c +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`analisis_sentimientos_beto_tass_c` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/analisis_sentimientos_beto_tass_c_en_5.5.0_3.0_1725767865895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/analisis_sentimientos_beto_tass_c_en_5.5.0_3.0_1725767865895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("analisis_sentimientos_beto_tass_c","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("analisis_sentimientos_beto_tass_c", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|analisis_sentimientos_beto_tass_c| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/raulgdp/Analisis-sentimientos-BETO-TASS-C \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-argureviews_specificity_roberta_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-argureviews_specificity_roberta_v1_pipeline_en.md new file mode 100644 index 00000000000000..651ae9adcf384a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-argureviews_specificity_roberta_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English argureviews_specificity_roberta_v1_pipeline pipeline XlmRoBertaForSequenceClassification from nihiluis +author: John Snow Labs +name: argureviews_specificity_roberta_v1_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`argureviews_specificity_roberta_v1_pipeline` is a English model originally trained by nihiluis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/argureviews_specificity_roberta_v1_pipeline_en_5.5.0_3.0_1725781380704.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/argureviews_specificity_roberta_v1_pipeline_en_5.5.0_3.0_1725781380704.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("argureviews_specificity_roberta_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("argureviews_specificity_roberta_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|argureviews_specificity_roberta_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|834.3 MB| + +## References + +https://huggingface.co/nihiluis/argureviews-specificity-roberta_v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-atte_2_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-atte_2_pipeline_en.md new file mode 100644 index 00000000000000..0e30eb36f7429a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-atte_2_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English atte_2_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: atte_2_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`atte_2_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/atte_2_pipeline_en_5.5.0_3.0_1725778605823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/atte_2_pipeline_en_5.5.0_3.0_1725778605823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("atte_2_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("atte_2_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|atte_2_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Atte_2 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-auro_4_en.md b/docs/_posts/ahmedlone127/2024-09-08-auro_4_en.md new file mode 100644 index 00000000000000..e65056abdbbdee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-auro_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English auro_4 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: auro_4 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`auro_4` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/auro_4_en_5.5.0_3.0_1725778532652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/auro_4_en_5.5.0_3.0_1725778532652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("auro_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("auro_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|auro_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/AURO_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-auro_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-auro_4_pipeline_en.md new file mode 100644 index 00000000000000..f5a5512dcbaac5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-auro_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English auro_4_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: auro_4_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`auro_4_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/auro_4_pipeline_en_5.5.0_3.0_1725778553998.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/auro_4_pipeline_en_5.5.0_3.0_1725778553998.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("auro_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("auro_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|auro_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/AURO_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en.md b/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en.md new file mode 100644 index 00000000000000..af8d6dd42ef5e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl +date: 2024-09-08 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en_5.5.0_3.0_1725757143487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_en_5.5.0_3.0_1725757143487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/BabyBERTa-wikipedia1_2.5-with-Masking_run2-finetuned-QASRL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en.md new file mode 100644 index 00000000000000..2b587a14f93ae4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline pipeline RoBertaForQuestionAnswering from lielbin +author: John Snow Labs +name: babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline` is a English model originally trained by lielbin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en_5.5.0_3.0_1725757145462.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline_en_5.5.0_3.0_1725757145462.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|babyberta_wikipedia1_2_5_with_masking_run2_finetuned_qasrl_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|32.0 MB| + +## References + +https://huggingface.co/lielbin/BabyBERTa-wikipedia1_2.5-with-Masking_run2-finetuned-QASRL + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bert_base_yelp_reviews_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-bert_base_yelp_reviews_pipeline_en.md new file mode 100644 index 00000000000000..452565be436785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bert_base_yelp_reviews_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_base_yelp_reviews_pipeline pipeline BertForSequenceClassification from saitejautpala +author: John Snow Labs +name: bert_base_yelp_reviews_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_yelp_reviews_pipeline` is a English model originally trained by saitejautpala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_yelp_reviews_pipeline_en_5.5.0_3.0_1725767874194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_yelp_reviews_pipeline_en_5.5.0_3.0_1725767874194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_base_yelp_reviews_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_base_yelp_reviews_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_yelp_reviews_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/saitejautpala/bert-base-yelp-reviews + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bert_based_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2024-09-08-bert_based_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..46cdfc784c2758 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bert_based_uncased_finetuned_imdb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_based_uncased_finetuned_imdb DistilBertForSequenceClassification from car13mesquita +author: John Snow Labs +name: bert_based_uncased_finetuned_imdb +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_based_uncased_finetuned_imdb` is a English model originally trained by car13mesquita. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_based_uncased_finetuned_imdb_en_5.5.0_3.0_1725775416020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_based_uncased_finetuned_imdb_en_5.5.0_3.0_1725775416020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_based_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_based_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_based_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/car13mesquita/bert-based-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_en.md b/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_en.md new file mode 100644 index 00000000000000..94ec7ec486683d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bert_imdb DistilBertForSequenceClassification from BlackBert +author: John Snow Labs +name: bert_imdb +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_imdb` is a English model originally trained by BlackBert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_imdb_en_5.5.0_3.0_1725764341371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_imdb_en_5.5.0_3.0_1725764341371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("bert_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_imdb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BlackBert/BERT_IMDB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_pipeline_en.md new file mode 100644 index 00000000000000..ce434e40918b03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bert_imdb_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English bert_imdb_pipeline pipeline DistilBertForSequenceClassification from BlackBert +author: John Snow Labs +name: bert_imdb_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_imdb_pipeline` is a English model originally trained by BlackBert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_imdb_pipeline_en_5.5.0_3.0_1725764353430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_imdb_pipeline_en_5.5.0_3.0_1725764353430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("bert_imdb_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("bert_imdb_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_imdb_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BlackBert/BERT_IMDB + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en.md b/docs/_posts/ahmedlone127/2024-09-08-bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en.md new file mode 100644 index 00000000000000..ec4cf308bd0db3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds RoBertaForSequenceClassification from Sleoruiz +author: John Snow Labs +name: bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds` is a English model originally trained by Sleoruiz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en_5.5.0_3.0_1725778720149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds_en_5.5.0_3.0_1725778720149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertin_roberta_fine_tuned_text_classification_slovene_data_augmentation_ds| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/Sleoruiz/bertin-roberta-fine-tuned-text-classification-SL-data-augmentation-ds \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-bertoslav_limited_en.md b/docs/_posts/ahmedlone127/2024-09-08-bertoslav_limited_en.md new file mode 100644 index 00000000000000..b52e732a3b289f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-bertoslav_limited_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English bertoslav_limited DistilBertEmbeddings from crabz +author: John Snow Labs +name: bertoslav_limited +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertoslav_limited` is a English model originally trained by crabz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertoslav_limited_en_5.5.0_3.0_1725775890067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertoslav_limited_en_5.5.0_3.0_1725775890067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("bertoslav_limited","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("bertoslav_limited","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertoslav_limited| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/crabz/bertoslav-limited \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_16_13_en.md b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_16_13_en.md new file mode 100644 index 00000000000000..4c732a413f119b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_16_13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English best_model_yelp_polarity_16_13 AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_16_13 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_16_13` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_16_13_en_5.5.0_3.0_1725767269097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_16_13_en_5.5.0_3.0_1725767269097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_16_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_16_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_16_13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-16-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_en.md b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_en.md new file mode 100644 index 00000000000000..57a2ffd3c3674f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English best_model_yelp_polarity_32_13 AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_32_13 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_32_13` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_13_en_5.5.0_3.0_1725755401178.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_13_en_5.5.0_3.0_1725755401178.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_32_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_32_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_32_13| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-32-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_pipeline_en.md new file mode 100644 index 00000000000000..802be0e1ca558d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_32_13_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English best_model_yelp_polarity_32_13_pipeline pipeline AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_32_13_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_32_13_pipeline` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_13_pipeline_en_5.5.0_3.0_1725755403693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_32_13_pipeline_en_5.5.0_3.0_1725755403693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("best_model_yelp_polarity_32_13_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("best_model_yelp_polarity_32_13_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_32_13_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-32-13 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_en.md b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_en.md new file mode 100644 index 00000000000000..e52523d0cda6f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English best_model_yelp_polarity_64_21 AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_64_21 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, albert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_64_21` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_21_en_5.5.0_3.0_1725755539879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_21_en_5.5.0_3.0_1725755539879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_64_21","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = AlbertForSequenceClassification.pretrained("best_model_yelp_polarity_64_21", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_64_21| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-64-21 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_pipeline_en.md new file mode 100644 index 00000000000000..6671cec5b2f11e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-best_model_yelp_polarity_64_21_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English best_model_yelp_polarity_64_21_pipeline pipeline AlbertForSequenceClassification from simonycl +author: John Snow Labs +name: best_model_yelp_polarity_64_21_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`best_model_yelp_polarity_64_21_pipeline` is a English model originally trained by simonycl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_21_pipeline_en_5.5.0_3.0_1725755542203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/best_model_yelp_polarity_64_21_pipeline_en_5.5.0_3.0_1725755542203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("best_model_yelp_polarity_64_21_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("best_model_yelp_polarity_64_21_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|best_model_yelp_polarity_64_21_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/simonycl/best_model-yelp_polarity-64-21 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- AlbertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cat_ner_xlmr_4_en.md b/docs/_posts/ahmedlone127/2024-09-08-cat_ner_xlmr_4_en.md new file mode 100644 index 00000000000000..500d943c6251d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cat_ner_xlmr_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cat_ner_xlmr_4 XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: cat_ner_xlmr_4 +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cat_ner_xlmr_4` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cat_ner_xlmr_4_en_5.5.0_3.0_1725773773225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cat_ner_xlmr_4_en_5.5.0_3.0_1725773773225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("cat_ner_xlmr_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("cat_ner_xlmr_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cat_ner_xlmr_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|814.0 MB| + +## References + +https://huggingface.co/homersimpson/cat-ner-xlmr-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en.md b/docs/_posts/ahmedlone127/2024-09-08-category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en.md new file mode 100644 index 00000000000000..dbc9887fc85fcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1 DistilBertForSequenceClassification from chuuhtetnaing +author: John Snow Labs +name: category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1` is a English model originally trained by chuuhtetnaing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en_5.5.0_3.0_1725776744753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1_en_5.5.0_3.0_1725776744753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|category_1_delivery_cancellation_distilbert_base_uncased_distilled_squad_v1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.4 MB| + +## References + +https://huggingface.co/chuuhtetnaing/category-1-delivery-cancellation-distilbert-base-uncased-distilled-squad-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-clasificadorcorreosoportedistilespanol_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-clasificadorcorreosoportedistilespanol_pipeline_en.md new file mode 100644 index 00000000000000..228d739b1831c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-clasificadorcorreosoportedistilespanol_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English clasificadorcorreosoportedistilespanol_pipeline pipeline DistilBertForSequenceClassification from Arodrigo +author: John Snow Labs +name: clasificadorcorreosoportedistilespanol_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clasificadorcorreosoportedistilespanol_pipeline` is a English model originally trained by Arodrigo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clasificadorcorreosoportedistilespanol_pipeline_en_5.5.0_3.0_1725776971720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clasificadorcorreosoportedistilespanol_pipeline_en_5.5.0_3.0_1725776971720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("clasificadorcorreosoportedistilespanol_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("clasificadorcorreosoportedistilespanol_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clasificadorcorreosoportedistilespanol_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|252.5 MB| + +## References + +https://huggingface.co/Arodrigo/ClasificadorCorreoSoporteDistilEspanol + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_en.md b/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_en.md new file mode 100644 index 00000000000000..fa329f084b8b8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classification_model_mtebad DistilBertForSequenceClassification from mtebad +author: John Snow Labs +name: classification_model_mtebad +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classification_model_mtebad` is a English model originally trained by mtebad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classification_model_mtebad_en_5.5.0_3.0_1725764427345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classification_model_mtebad_en_5.5.0_3.0_1725764427345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("classification_model_mtebad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("classification_model_mtebad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classification_model_mtebad| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/mtebad/classification_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_pipeline_en.md new file mode 100644 index 00000000000000..2805799a91fc09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-classification_model_mtebad_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English classification_model_mtebad_pipeline pipeline DistilBertForSequenceClassification from mtebad +author: John Snow Labs +name: classification_model_mtebad_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classification_model_mtebad_pipeline` is a English model originally trained by mtebad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classification_model_mtebad_pipeline_en_5.5.0_3.0_1725764439173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classification_model_mtebad_pipeline_en_5.5.0_3.0_1725764439173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("classification_model_mtebad_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("classification_model_mtebad_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classification_model_mtebad_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/mtebad/classification_model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-classification_model_sushant22_en.md b/docs/_posts/ahmedlone127/2024-09-08-classification_model_sushant22_en.md new file mode 100644 index 00000000000000..41e07bc1474ce4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-classification_model_sushant22_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English classification_model_sushant22 DistilBertForSequenceClassification from sushant22 +author: John Snow Labs +name: classification_model_sushant22 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`classification_model_sushant22` is a English model originally trained by sushant22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/classification_model_sushant22_en_5.5.0_3.0_1725774842278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/classification_model_sushant22_en_5.5.0_3.0_1725774842278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("classification_model_sushant22","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("classification_model_sushant22", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|classification_model_sushant22| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/sushant22/classification_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cm124057_01_en.md b/docs/_posts/ahmedlone127/2024-09-08-cm124057_01_en.md new file mode 100644 index 00000000000000..bfc680e113b79f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cm124057_01_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English cm124057_01 DistilBertForSequenceClassification from jkloip +author: John Snow Labs +name: cm124057_01 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cm124057_01` is a English model originally trained by jkloip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cm124057_01_en_5.5.0_3.0_1725775127793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cm124057_01_en_5.5.0_3.0_1725775127793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("cm124057_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("cm124057_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cm124057_01| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jkloip/cm124057-01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_en.md new file mode 100644 index 00000000000000..4669ca102dfd60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English cpu_netzero_classifier MPNetEmbeddings from mtyrrell +author: John Snow Labs +name: cpu_netzero_classifier +date: 2024-09-08 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_netzero_classifier` is a English model originally trained by mtyrrell. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_netzero_classifier_en_5.5.0_3.0_1725756485067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_netzero_classifier_en_5.5.0_3.0_1725756485067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("cpu_netzero_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("cpu_netzero_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_netzero_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +References + +https://huggingface.co/mtyrrell/CPU_Netzero_Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_pipeline_en.md new file mode 100644 index 00000000000000..79b97cad0876ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cpu_netzero_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpu_netzero_classifier_pipeline pipeline MPNetForSequenceClassification from mtyrrell +author: John Snow Labs +name: cpu_netzero_classifier_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_netzero_classifier_pipeline` is a English model originally trained by mtyrrell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_netzero_classifier_pipeline_en_5.5.0_3.0_1725756504218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_netzero_classifier_pipeline_en_5.5.0_3.0_1725756504218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpu_netzero_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpu_netzero_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_netzero_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/mtyrrell/CPU_Netzero_Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_en.md new file mode 100644 index 00000000000000..960b209befe38e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English cpu_transport_ghg_classifier MPNetEmbeddings from mtyrrell +author: John Snow Labs +name: cpu_transport_ghg_classifier +date: 2024-09-08 +tags: [mpnet, en, open_source, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_transport_ghg_classifier` is a English model originally trained by mtyrrell. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_transport_ghg_classifier_en_5.5.0_3.0_1725756778733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_transport_ghg_classifier_en_5.5.0_3.0_1725756778733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =MPNetEmbeddings.pretrained("cpu_transport_ghg_classifier","en") \ + .setInputCols(["documents"]) \ + .setOutputCol("mpnet_embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val embeddings = MPNetEmbeddings + .pretrained("cpu_transport_ghg_classifier", "en") + .setInputCols(Array("documents")) + .setOutputCol("mpnet_embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_transport_ghg_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.5 MB| + +## References + +References + +https://huggingface.co/mtyrrell/CPU_Transport_GHG_Classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_pipeline_en.md new file mode 100644 index 00000000000000..9e591948fe5cd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cpu_transport_ghg_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English cpu_transport_ghg_classifier_pipeline pipeline MPNetForSequenceClassification from mtyrrell +author: John Snow Labs +name: cpu_transport_ghg_classifier_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cpu_transport_ghg_classifier_pipeline` is a English model originally trained by mtyrrell. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cpu_transport_ghg_classifier_pipeline_en_5.5.0_3.0_1725756798351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cpu_transport_ghg_classifier_pipeline_en_5.5.0_3.0_1725756798351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("cpu_transport_ghg_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("cpu_transport_ghg_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cpu_transport_ghg_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/mtyrrell/CPU_Transport_GHG_Classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- MPNetForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en.md b/docs/_posts/ahmedlone127/2024-09-08-cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en.md new file mode 100644 index 00000000000000..667d84e833bf0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English cross_all_bs192_hardneg_finetuned_webnlg2020_relevance MPNetEmbeddings from teven +author: John Snow Labs +name: cross_all_bs192_hardneg_finetuned_webnlg2020_relevance +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cross_all_bs192_hardneg_finetuned_webnlg2020_relevance` is a English model originally trained by teven. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en_5.5.0_3.0_1725769579497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cross_all_bs192_hardneg_finetuned_webnlg2020_relevance_en_5.5.0_3.0_1725769579497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("cross_all_bs192_hardneg_finetuned_webnlg2020_relevance","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("cross_all_bs192_hardneg_finetuned_webnlg2020_relevance","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cross_all_bs192_hardneg_finetuned_webnlg2020_relevance| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/teven/cross_all_bs192_hardneg_finetuned_WebNLG2020_relevance \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-custommodel_yelp_hanyundudddd_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-custommodel_yelp_hanyundudddd_pipeline_en.md new file mode 100644 index 00000000000000..ce8852bb957a0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-custommodel_yelp_hanyundudddd_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English custommodel_yelp_hanyundudddd_pipeline pipeline DistilBertForSequenceClassification from hanyundudddd +author: John Snow Labs +name: custommodel_yelp_hanyundudddd_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`custommodel_yelp_hanyundudddd_pipeline` is a English model originally trained by hanyundudddd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/custommodel_yelp_hanyundudddd_pipeline_en_5.5.0_3.0_1725764373297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/custommodel_yelp_hanyundudddd_pipeline_en_5.5.0_3.0_1725764373297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("custommodel_yelp_hanyundudddd_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("custommodel_yelp_hanyundudddd_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|custommodel_yelp_hanyundudddd_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/hanyundudddd/CustomModel_yelp + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-darija_englishv2_1_en.md b/docs/_posts/ahmedlone127/2024-09-08-darija_englishv2_1_en.md new file mode 100644 index 00000000000000..f7c9c46d8c4229 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-darija_englishv2_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English darija_englishv2_1 MarianTransformer from hananeChab +author: John Snow Labs +name: darija_englishv2_1 +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`darija_englishv2_1` is a English model originally trained by hananeChab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/darija_englishv2_1_en_5.5.0_3.0_1725765694735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/darija_englishv2_1_en_5.5.0_3.0_1725765694735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("darija_englishv2_1","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("darija_englishv2_1","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|darija_englishv2_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|527.9 MB| + +## References + +https://huggingface.co/hananeChab/darija_englishV2.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-depression_detection_model_en.md b/docs/_posts/ahmedlone127/2024-09-08-depression_detection_model_en.md new file mode 100644 index 00000000000000..bb8e3338aa059c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-depression_detection_model_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English depression_detection_model DistilBertForSequenceClassification from thePixel42 +author: John Snow Labs +name: depression_detection_model +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`depression_detection_model` is a English model originally trained by thePixel42. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/depression_detection_model_en_5.5.0_3.0_1725777102716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/depression_detection_model_en_5.5.0_3.0_1725777102716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("depression_detection_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("depression_detection_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|depression_detection_model| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/thePixel42/depression_detection_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_distilbert_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_distilbert_en.md new file mode 100644 index 00000000000000..71b84782bb0485 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_distilbert_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_cased_distilbert DistilBertEmbeddings from distilbert +author: John Snow Labs +name: distilbert_base_cased_distilbert +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_distilbert` is a English model originally trained by distilbert. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilbert_en_5.5.0_3.0_1725776527099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_distilbert_en_5.5.0_3.0_1725776527099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_cased_distilbert","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_cased_distilbert","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_distilbert| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/distilbert/distilbert-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_en.md new file mode 100644 index 00000000000000..ee1cdf8c2edaab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_imdb_shindc DistilBertEmbeddings from ShinDC +author: John Snow Labs +name: distilbert_base_cased_finetuned_imdb_shindc +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_imdb_shindc` is a English model originally trained by ShinDC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_imdb_shindc_en_5.5.0_3.0_1725776070324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_imdb_shindc_en_5.5.0_3.0_1725776070324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_cased_finetuned_imdb_shindc","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_cased_finetuned_imdb_shindc","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_imdb_shindc| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/ShinDC/distilbert-base-cased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_pipeline_en.md new file mode 100644 index 00000000000000..98e2ececf7909c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_cased_finetuned_imdb_shindc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_cased_finetuned_imdb_shindc_pipeline pipeline DistilBertEmbeddings from ShinDC +author: John Snow Labs +name: distilbert_base_cased_finetuned_imdb_shindc_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_finetuned_imdb_shindc_pipeline` is a English model originally trained by ShinDC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_imdb_shindc_pipeline_en_5.5.0_3.0_1725776082021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_finetuned_imdb_shindc_pipeline_en_5.5.0_3.0_1725776082021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_cased_finetuned_imdb_shindc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_cased_finetuned_imdb_shindc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_finetuned_imdb_shindc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|243.8 MB| + +## References + +https://huggingface.co/ShinDC/distilbert-base-cased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_english_greek_modern_russian_cased_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_english_greek_modern_russian_cased_pipeline_en.md new file mode 100644 index 00000000000000..41b707d8cb3758 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_english_greek_modern_russian_cased_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_english_greek_modern_russian_cased_pipeline pipeline DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_greek_modern_russian_cased_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_greek_modern_russian_cased_pipeline` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_greek_modern_russian_cased_pipeline_en_5.5.0_3.0_1725776139689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_greek_modern_russian_cased_pipeline_en_5.5.0_3.0_1725776139689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_english_greek_modern_russian_cased_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_english_greek_modern_russian_cased_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_greek_modern_russian_cased_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|273.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-el-ru-cased + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx.md new file mode 100644 index 00000000000000..2dd7f9bef39094 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline pipeline DistilBertForSequenceClassification from Mou11209203 +author: John Snow Labs +name: distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline +date: 2024-09-08 +tags: [xx, open_source, pipeline, onnx] +task: Text Classification +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline` is a Multilingual model originally trained by Mou11209203. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx_5.5.0_3.0_1725777095536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline_xx_5.5.0_3.0_1725777095536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline", lang = "xx") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline", lang = "xx") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_regression_finetuned_ptt_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|xx| +|Size:|507.6 MB| + +## References + +https://huggingface.co/Mou11209203/distilbert-base-multilingual-cased_regression_finetuned_ptt + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_clinc_schnatz65_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_clinc_schnatz65_en.md new file mode 100644 index 00000000000000..15d7b46069e723 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_clinc_schnatz65_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc_schnatz65 DistilBertForSequenceClassification from Schnatz65 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc_schnatz65 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc_schnatz65` is a English model originally trained by Schnatz65. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_schnatz65_en_5.5.0_3.0_1725764240779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc_schnatz65_en_5.5.0_3.0_1725764240779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_schnatz65","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_clinc_schnatz65", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc_schnatz65| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.9 MB| + +## References + +https://huggingface.co/Schnatz65/distilbert-base-uncased-finetuned-clinc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_en.md new file mode 100644 index 00000000000000..0e4f7841cf840c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_bistudent DistilBertForSequenceClassification from BIStudent +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_bistudent +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_bistudent` is a English model originally trained by BIStudent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_bistudent_en_5.5.0_3.0_1725764772035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_bistudent_en_5.5.0_3.0_1725764772035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_bistudent","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_bistudent", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_bistudent| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BIStudent/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en.md new file mode 100644 index 00000000000000..37c499d6394d06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_bistudent_pipeline pipeline DistilBertForSequenceClassification from BIStudent +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_bistudent_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_bistudent_pipeline` is a English model originally trained by BIStudent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en_5.5.0_3.0_1725764783845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_bistudent_pipeline_en_5.5.0_3.0_1725764783845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_bistudent_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_bistudent_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_bistudent_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/BIStudent/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_lilvoda_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_lilvoda_en.md new file mode 100644 index 00000000000000..e4bbf8507e4ff4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_lilvoda_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_lilvoda DistilBertForSequenceClassification from lilvoda +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_lilvoda +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_lilvoda` is a English model originally trained by lilvoda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_lilvoda_en_5.5.0_3.0_1725775247880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_lilvoda_en_5.5.0_3.0_1725775247880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_lilvoda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_lilvoda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_lilvoda| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/lilvoda/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_niwang2024_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_niwang2024_en.md new file mode 100644 index 00000000000000..07f9c00fa384eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_niwang2024_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_niwang2024 DistilBertForSequenceClassification from NiWang2024 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_niwang2024 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_niwang2024` is a English model originally trained by NiWang2024. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_niwang2024_en_5.5.0_3.0_1725774745914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_niwang2024_en_5.5.0_3.0_1725774745914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_niwang2024","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_niwang2024", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_niwang2024| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/NiWang2024/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en.md new file mode 100644 index 00000000000000..438887b30d5a4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline pipeline DistilBertForSequenceClassification from Schnatz65 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline` is a English model originally trained by Schnatz65. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en_5.5.0_3.0_1725764640676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline_en_5.5.0_3.0_1725764640676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_schnatz65_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Schnatz65/distilbert-base-uncased-finetuned-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en.md new file mode 100644 index 00000000000000..7e62f1ae051799 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_talzoomanzoo DistilBertForSequenceClassification from talzoomanzoo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_talzoomanzoo +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_talzoomanzoo` is a English model originally trained by talzoomanzoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en_5.5.0_3.0_1725764796858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_talzoomanzoo_en_5.5.0_3.0_1725764796858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_talzoomanzoo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_talzoomanzoo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_talzoomanzoo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/talzoomanzoo/distilbert-base-uncased-finetuned-emotion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en.md new file mode 100644 index 00000000000000..46a97a022c6163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35 DistilBertForSequenceClassification from atsstagram +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35` is a English model originally trained by atsstagram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en_5.5.0_3.0_1725774942721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_en_5.5.0_3.0_1725774942721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/atsstagram/distilbert-base-uncased-finetuned-emotion-with-annotated-by-gpt35 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en.md new file mode 100644 index 00000000000000..76038d8e6c70a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline pipeline DistilBertForSequenceClassification from atsstagram +author: John Snow Labs +name: distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline` is a English model originally trained by atsstagram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en_5.5.0_3.0_1725774955329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline_en_5.5.0_3.0_1725774955329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_emotion_with_annotated_by_gpt35_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/atsstagram/distilbert-base-uncased-finetuned-emotion-with-annotated-by-gpt35 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en.md new file mode 100644 index 00000000000000..2177e37331df32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_adrien35_pipeline pipeline DistilBertEmbeddings from Adrien35 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_adrien35_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_adrien35_pipeline` is a English model originally trained by Adrien35. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en_5.5.0_3.0_1725776372646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_adrien35_pipeline_en_5.5.0_3.0_1725776372646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_adrien35_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_adrien35_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_adrien35_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Adrien35/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en.md new file mode 100644 index 00000000000000..2d857f42e062cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dylettante_pipeline pipeline DistilBertEmbeddings from Dylettante +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dylettante_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dylettante_pipeline` is a English model originally trained by Dylettante. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en_5.5.0_3.0_1725782452188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dylettante_pipeline_en_5.5.0_3.0_1725782452188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_dylettante_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_dylettante_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dylettante_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dylettante/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en.md new file mode 100644 index 00000000000000..cbbc1a86100442 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline pipeline DistilBertEmbeddings from ellieburton +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline` is a English model originally trained by ellieburton. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en_5.5.0_3.0_1725782445563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline_en_5.5.0_3.0_1725782445563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ellieburton_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ellieburton/distilbert-base-uncased-finetuned-imdb + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_lidiapierre_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_lidiapierre_en.md new file mode 100644 index 00000000000000..01c4c2d0d14f0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_lidiapierre_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_lidiapierre DistilBertEmbeddings from lidiapierre +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_lidiapierre +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_lidiapierre` is a English model originally trained by lidiapierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lidiapierre_en_5.5.0_3.0_1725782667228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lidiapierre_en_5.5.0_3.0_1725782667228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lidiapierre","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lidiapierre","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_lidiapierre| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lidiapierre/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_majkeldcember_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_majkeldcember_en.md new file mode 100644 index 00000000000000..397e2e7c4e8136 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_majkeldcember_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_majkeldcember DistilBertEmbeddings from MajkelDcember +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_majkeldcember +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_majkeldcember` is a English model originally trained by MajkelDcember. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_majkeldcember_en_5.5.0_3.0_1725782303928.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_majkeldcember_en_5.5.0_3.0_1725782303928.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_majkeldcember","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_majkeldcember","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_majkeldcember| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MajkelDcember/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_marcosautuori_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_marcosautuori_en.md new file mode 100644 index 00000000000000..dd02460b26ca9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_marcosautuori_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_marcosautuori DistilBertEmbeddings from MarcosAutuori +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_marcosautuori +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_marcosautuori` is a English model originally trained by MarcosAutuori. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_marcosautuori_en_5.5.0_3.0_1725782409360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_marcosautuori_en_5.5.0_3.0_1725782409360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_marcosautuori","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_marcosautuori","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_marcosautuori| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MarcosAutuori/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_pbwinter_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_pbwinter_en.md new file mode 100644 index 00000000000000..83c0d9d964edc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_pbwinter_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_pbwinter DistilBertEmbeddings from pbwinter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_pbwinter +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_pbwinter` is a English model originally trained by pbwinter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pbwinter_en_5.5.0_3.0_1725782529876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pbwinter_en_5.5.0_3.0_1725782529876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pbwinter","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pbwinter","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_pbwinter| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pbwinter/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_xxxxxcz_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_xxxxxcz_en.md new file mode 100644 index 00000000000000..c19ea8b7ef9842 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_imdb_xxxxxcz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_xxxxxcz DistilBertEmbeddings from xxxxxcz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_xxxxxcz +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_xxxxxcz` is a English model originally trained by xxxxxcz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_xxxxxcz_en_5.5.0_3.0_1725775890429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_xxxxxcz_en_5.5.0_3.0_1725775890429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_xxxxxcz","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_xxxxxcz","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_xxxxxcz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/xxxxxcz/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_news_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_news_en.md new file mode 100644 index 00000000000000..50ecf5e567c95d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_news_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_news DistilBertForSequenceClassification from ruban19 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_news +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_news` is a English model originally trained by ruban19. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_news_en_5.5.0_3.0_1725764852712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_news_en_5.5.0_3.0_1725764852712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_news","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_base_uncased_finetuned_news", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_news| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ruban19/distilbert-base-uncased-finetuned-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_stationary_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_stationary_pipeline_en.md new file mode 100644 index 00000000000000..e39f288620ab59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_stationary_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_stationary_pipeline pipeline DistilBertForSequenceClassification from Luggi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_stationary_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_stationary_pipeline` is a English model originally trained by Luggi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_stationary_pipeline_en_5.5.0_3.0_1725764894105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_stationary_pipeline_en_5.5.0_3.0_1725764894105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_stationary_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_stationary_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_stationary_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Luggi/distilbert-base-uncased-finetuned-stationary + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_streamers_accelerate_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_streamers_accelerate_en.md new file mode 100644 index 00000000000000..2a0ab072f89fff --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_finetuned_streamers_accelerate_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_streamers_accelerate DistilBertEmbeddings from muhbdeir +author: John Snow Labs +name: distilbert_base_uncased_finetuned_streamers_accelerate +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_streamers_accelerate` is a English model originally trained by muhbdeir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_streamers_accelerate_en_5.5.0_3.0_1725782200288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_streamers_accelerate_en_5.5.0_3.0_1725782200288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_streamers_accelerate","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_streamers_accelerate","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_streamers_accelerate| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/muhbdeir/distilbert-base-uncased-finetuned-streamers-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en.md new file mode 100644 index 00000000000000..830c02254379d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline pipeline DistilBertForSequenceClassification from tom192180 +author: John Snow Labs +name: distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline` is a English model originally trained by tom192180. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en_5.5.0_3.0_1725777506366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline_en_5.5.0_3.0_1725777506366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_odm_zphr_0st13sd_ut72ut1large13pfxnf_simsp400_clean200_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.7 MB| + +## References + +https://huggingface.co/tom192180/distilbert-base-uncased_odm_zphr_0st13sd_ut72ut1large13PfxNf_simsp400_clean200 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_en.md new file mode 100644 index 00000000000000..196377080cb566 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_coarse5_js_1_1 DistilBertForSequenceClassification from jonbarlow +author: John Snow Labs +name: distilbert_coarse5_js_1_1 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_coarse5_js_1_1` is a English model originally trained by jonbarlow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_coarse5_js_1_1_en_5.5.0_3.0_1725775225552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_coarse5_js_1_1_en_5.5.0_3.0_1725775225552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_coarse5_js_1_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_coarse5_js_1_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_coarse5_js_1_1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jonbarlow/distilbert_coarse5_js_1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_pipeline_en.md new file mode 100644 index 00000000000000..1f03fd4979c5fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_coarse5_js_1_1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_coarse5_js_1_1_pipeline pipeline DistilBertForSequenceClassification from jonbarlow +author: John Snow Labs +name: distilbert_coarse5_js_1_1_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_coarse5_js_1_1_pipeline` is a English model originally trained by jonbarlow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_coarse5_js_1_1_pipeline_en_5.5.0_3.0_1725775239089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_coarse5_js_1_1_pipeline_en_5.5.0_3.0_1725775239089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_coarse5_js_1_1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_coarse5_js_1_1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_coarse5_js_1_1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jonbarlow/distilbert_coarse5_js_1.1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_masking_1perc_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_masking_1perc_pipeline_en.md new file mode 100644 index 00000000000000..7a16cf0b39d869 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_masking_1perc_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_masking_1perc_pipeline pipeline DistilBertEmbeddings from johannes-garstenauer +author: John Snow Labs +name: distilbert_masking_1perc_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_masking_1perc_pipeline` is a English model originally trained by johannes-garstenauer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_masking_1perc_pipeline_en_5.5.0_3.0_1725782324479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_masking_1perc_pipeline_en_5.5.0_3.0_1725782324479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_masking_1perc_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_masking_1perc_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_masking_1perc_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|247.6 MB| + +## References + +https://huggingface.co/johannes-garstenauer/distilbert-masking-1perc + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_movie_review_sentiment_classifier_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_movie_review_sentiment_classifier_3_pipeline_en.md new file mode 100644 index 00000000000000..877c5e0b5d9a8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_movie_review_sentiment_classifier_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_movie_review_sentiment_classifier_3_pipeline pipeline DistilBertForSequenceClassification from gyesibiney +author: John Snow Labs +name: distilbert_movie_review_sentiment_classifier_3_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_movie_review_sentiment_classifier_3_pipeline` is a English model originally trained by gyesibiney. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_movie_review_sentiment_classifier_3_pipeline_en_5.5.0_3.0_1725777386102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_movie_review_sentiment_classifier_3_pipeline_en_5.5.0_3.0_1725777386102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_movie_review_sentiment_classifier_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_movie_review_sentiment_classifier_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_movie_review_sentiment_classifier_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.9 MB| + +## References + +https://huggingface.co/gyesibiney/Distilbert-movie-review-sentiment-classifier-3 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_en.md new file mode 100644 index 00000000000000..41d01c75fa9181 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distilbert_nsfw_text_classifier DistilBertForSequenceClassification from eliasalbouzidi +author: John Snow Labs +name: distilbert_nsfw_text_classifier +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_nsfw_text_classifier` is a English model originally trained by eliasalbouzidi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_text_classifier_en_5.5.0_3.0_1725764953000.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_text_classifier_en_5.5.0_3.0_1725764953000.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_nsfw_text_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distilbert_nsfw_text_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_nsfw_text_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/eliasalbouzidi/distilbert-nsfw-text-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_pipeline_en.md new file mode 100644 index 00000000000000..52a44480f9e061 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_nsfw_text_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_nsfw_text_classifier_pipeline pipeline DistilBertForSequenceClassification from eliasalbouzidi +author: John Snow Labs +name: distilbert_nsfw_text_classifier_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_nsfw_text_classifier_pipeline` is a English model originally trained by eliasalbouzidi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_text_classifier_pipeline_en_5.5.0_3.0_1725764964301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_nsfw_text_classifier_pipeline_en_5.5.0_3.0_1725764964301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_nsfw_text_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_nsfw_text_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_nsfw_text_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/eliasalbouzidi/distilbert-nsfw-text-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distilbert_tweet_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distilbert_tweet_pipeline_en.md new file mode 100644 index 00000000000000..2e8395fa584563 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distilbert_tweet_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distilbert_tweet_pipeline pipeline DistilBertForSequenceClassification from Sangmitra-06 +author: John Snow Labs +name: distilbert_tweet_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_tweet_pipeline` is a English model originally trained by Sangmitra-06. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_tweet_pipeline_en_5.5.0_3.0_1725775467183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_tweet_pipeline_en_5.5.0_3.0_1725775467183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distilbert_tweet_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distilbert_tweet_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_tweet_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Sangmitra-06/DistilBERT_tweet + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_en.md b/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_en.md new file mode 100644 index 00000000000000..3e900324630195 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English distillbert_sentiment_analysis DistilBertForSequenceClassification from adhityaprimandhika +author: John Snow Labs +name: distillbert_sentiment_analysis +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_sentiment_analysis` is a English model originally trained by adhityaprimandhika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_sentiment_analysis_en_5.5.0_3.0_1725764860266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_sentiment_analysis_en_5.5.0_3.0_1725764860266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_sentiment_analysis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("distillbert_sentiment_analysis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_sentiment_analysis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|507.6 MB| + +## References + +https://huggingface.co/adhityaprimandhika/distillbert_sentiment_analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_pipeline_en.md new file mode 100644 index 00000000000000..a1e8a837687332 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-distillbert_sentiment_analysis_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English distillbert_sentiment_analysis_pipeline pipeline DistilBertForSequenceClassification from adhityaprimandhika +author: John Snow Labs +name: distillbert_sentiment_analysis_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_sentiment_analysis_pipeline` is a English model originally trained by adhityaprimandhika. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_sentiment_analysis_pipeline_en_5.5.0_3.0_1725764884222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_sentiment_analysis_pipeline_en_5.5.0_3.0_1725764884222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("distillbert_sentiment_analysis_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("distillbert_sentiment_analysis_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_sentiment_analysis_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|507.6 MB| + +## References + +https://huggingface.co/adhityaprimandhika/distillbert_sentiment_analysis + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-facets_gpt_35_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-facets_gpt_35_pipeline_en.md new file mode 100644 index 00000000000000..7f118ee0db624d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-facets_gpt_35_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English facets_gpt_35_pipeline pipeline MPNetEmbeddings from ingeol +author: John Snow Labs +name: facets_gpt_35_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`facets_gpt_35_pipeline` is a English model originally trained by ingeol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/facets_gpt_35_pipeline_en_5.5.0_3.0_1725769690286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/facets_gpt_35_pipeline_en_5.5.0_3.0_1725769690286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("facets_gpt_35_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("facets_gpt_35_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|facets_gpt_35_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ingeol/facets_gpt_35 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-finance_news_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-finance_news_classifier_en.md new file mode 100644 index 00000000000000..d3f13db9ae412f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-finance_news_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English finance_news_classifier XlmRoBertaForSequenceClassification from Hyeonseo +author: John Snow Labs +name: finance_news_classifier +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finance_news_classifier` is a English model originally trained by Hyeonseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finance_news_classifier_en_5.5.0_3.0_1725780995653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finance_news_classifier_en_5.5.0_3.0_1725780995653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("finance_news_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("finance_news_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finance_news_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Hyeonseo/finance_news_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-gal_enptsp_xlm_r_gl.md b/docs/_posts/ahmedlone127/2024-09-08-gal_enptsp_xlm_r_gl.md new file mode 100644 index 00000000000000..797270074021e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-gal_enptsp_xlm_r_gl.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Galician gal_enptsp_xlm_r XlmRoBertaForTokenClassification from mbruton +author: John Snow Labs +name: gal_enptsp_xlm_r +date: 2024-09-08 +tags: [gl, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: gl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_enptsp_xlm_r` is a Galician model originally trained by mbruton. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_enptsp_xlm_r_gl_5.5.0_3.0_1725773113423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_enptsp_xlm_r_gl_5.5.0_3.0_1725773113423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_enptsp_xlm_r","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_enptsp_xlm_r", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_enptsp_xlm_r| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|gl| +|Size:|878.3 MB| + +## References + +https://huggingface.co/mbruton/gal_enptsp_XLM-R \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-gal_portuguese_xlm_r_pipeline_gl.md b/docs/_posts/ahmedlone127/2024-09-08-gal_portuguese_xlm_r_pipeline_gl.md new file mode 100644 index 00000000000000..c3c24706d542fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-gal_portuguese_xlm_r_pipeline_gl.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Galician gal_portuguese_xlm_r_pipeline pipeline XlmRoBertaForTokenClassification from mbruton +author: John Snow Labs +name: gal_portuguese_xlm_r_pipeline +date: 2024-09-08 +tags: [gl, open_source, pipeline, onnx] +task: Named Entity Recognition +language: gl +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_portuguese_xlm_r_pipeline` is a Galician model originally trained by mbruton. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_portuguese_xlm_r_pipeline_gl_5.5.0_3.0_1725773422492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_portuguese_xlm_r_pipeline_gl_5.5.0_3.0_1725773422492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("gal_portuguese_xlm_r_pipeline", lang = "gl") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("gal_portuguese_xlm_r_pipeline", lang = "gl") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_portuguese_xlm_r_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|gl| +|Size:|857.1 MB| + +## References + +https://huggingface.co/mbruton/gal_pt_XLM-R + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-gal_sayula_popoluca_iwcg_4_en.md b/docs/_posts/ahmedlone127/2024-09-08-gal_sayula_popoluca_iwcg_4_en.md new file mode 100644 index 00000000000000..748ef3902a029b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-gal_sayula_popoluca_iwcg_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English gal_sayula_popoluca_iwcg_4 XlmRoBertaForTokenClassification from homersimpson +author: John Snow Labs +name: gal_sayula_popoluca_iwcg_4 +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gal_sayula_popoluca_iwcg_4` is a English model originally trained by homersimpson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iwcg_4_en_5.5.0_3.0_1725772349538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gal_sayula_popoluca_iwcg_4_en_5.5.0_3.0_1725772349538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_sayula_popoluca_iwcg_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("gal_sayula_popoluca_iwcg_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gal_sayula_popoluca_iwcg_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|424.0 MB| + +## References + +https://huggingface.co/homersimpson/gal-pos-iwcg-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en.md new file mode 100644 index 00000000000000..0b0957918a6045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline pipeline BertForSequenceClassification from etadevosyan +author: John Snow Labs +name: has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline` is a English model originally trained by etadevosyan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en_5.5.0_3.0_1725768530431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline_en_5.5.0_3.0_1725768530431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|has_the_doctor_specified_whether_the_patient_can_belarusian_seen_heard_bert_first512_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|666.5 MB| + +## References + +https://huggingface.co/etadevosyan/has_the_doctor_specified_whether_the_patient_can_be_seen_heard_bert_First512 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-hw1_eva1209_en.md b/docs/_posts/ahmedlone127/2024-09-08-hw1_eva1209_en.md new file mode 100644 index 00000000000000..73f8773c69cabd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-hw1_eva1209_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hw1_eva1209 DistilBertForSequenceClassification from Eva1209 +author: John Snow Labs +name: hw1_eva1209 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw1_eva1209` is a English model originally trained by Eva1209. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw1_eva1209_en_5.5.0_3.0_1725774859801.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw1_eva1209_en_5.5.0_3.0_1725774859801.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw1_eva1209","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw1_eva1209", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw1_eva1209| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Eva1209/HW1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_en.md b/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_en.md new file mode 100644 index 00000000000000..acf4b40e2aeb75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English hw_1_aia_tclin DistilBertForSequenceClassification from AIA-tclin +author: John Snow Labs +name: hw_1_aia_tclin +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw_1_aia_tclin` is a English model originally trained by AIA-tclin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw_1_aia_tclin_en_5.5.0_3.0_1725777054179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw_1_aia_tclin_en_5.5.0_3.0_1725777054179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw_1_aia_tclin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("hw_1_aia_tclin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw_1_aia_tclin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/AIA-tclin/hw-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_pipeline_en.md new file mode 100644 index 00000000000000..06f6cad6c56781 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-hw_1_aia_tclin_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English hw_1_aia_tclin_pipeline pipeline DistilBertForSequenceClassification from AIA-tclin +author: John Snow Labs +name: hw_1_aia_tclin_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hw_1_aia_tclin_pipeline` is a English model originally trained by AIA-tclin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hw_1_aia_tclin_pipeline_en_5.5.0_3.0_1725777066372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hw_1_aia_tclin_pipeline_en_5.5.0_3.0_1725777066372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("hw_1_aia_tclin_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("hw_1_aia_tclin_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hw_1_aia_tclin_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/AIA-tclin/hw-1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-imdb_3_en.md b/docs/_posts/ahmedlone127/2024-09-08-imdb_3_en.md new file mode 100644 index 00000000000000..1fd6a5255043dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-imdb_3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English imdb_3 DistilBertForSequenceClassification from draghicivlad +author: John Snow Labs +name: imdb_3 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_3` is a English model originally trained by draghicivlad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_3_en_5.5.0_3.0_1725764237542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_3_en_5.5.0_3.0_1725764237542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/draghicivlad/imdb_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_en.md b/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_en.md new file mode 100644 index 00000000000000..1bd39d0f3f8e40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English imdb_distilbert_apoorvaec1030 DistilBertForSequenceClassification from apoorvaec1030 +author: John Snow Labs +name: imdb_distilbert_apoorvaec1030 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_distilbert_apoorvaec1030` is a English model originally trained by apoorvaec1030. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_distilbert_apoorvaec1030_en_5.5.0_3.0_1725764601069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_distilbert_apoorvaec1030_en_5.5.0_3.0_1725764601069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_distilbert_apoorvaec1030","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("imdb_distilbert_apoorvaec1030", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_distilbert_apoorvaec1030| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/apoorvaec1030/imdb_distilBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_pipeline_en.md new file mode 100644 index 00000000000000..ed7c27d76553e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-imdb_distilbert_apoorvaec1030_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English imdb_distilbert_apoorvaec1030_pipeline pipeline DistilBertForSequenceClassification from apoorvaec1030 +author: John Snow Labs +name: imdb_distilbert_apoorvaec1030_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`imdb_distilbert_apoorvaec1030_pipeline` is a English model originally trained by apoorvaec1030. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/imdb_distilbert_apoorvaec1030_pipeline_en_5.5.0_3.0_1725764612955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/imdb_distilbert_apoorvaec1030_pipeline_en_5.5.0_3.0_1725764612955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("imdb_distilbert_apoorvaec1030_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("imdb_distilbert_apoorvaec1030_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|imdb_distilbert_apoorvaec1030_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/apoorvaec1030/imdb_distilBERT + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-impara_qe_en.md b/docs/_posts/ahmedlone127/2024-09-08-impara_qe_en.md new file mode 100644 index 00000000000000..2f9aa7cbd4740d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-impara_qe_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English impara_qe BertForSequenceClassification from gotutiyan +author: John Snow Labs +name: impara_qe +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`impara_qe` is a English model originally trained by gotutiyan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/impara_qe_en_5.5.0_3.0_1725761331149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/impara_qe_en_5.5.0_3.0_1725761331149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("impara_qe","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("impara_qe", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|impara_qe| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/gotutiyan/IMPARA-QE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-inde_4_en.md b/docs/_posts/ahmedlone127/2024-09-08-inde_4_en.md new file mode 100644 index 00000000000000..edd37c4441d457 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-inde_4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English inde_4 RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: inde_4 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inde_4` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inde_4_en_5.5.0_3.0_1725778987645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inde_4_en_5.5.0_3.0_1725778987645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("inde_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inde_4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/Inde_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-indobert_sentiment_analysis_id.md b/docs/_posts/ahmedlone127/2024-09-08-indobert_sentiment_analysis_id.md new file mode 100644 index 00000000000000..b8c4c0d4ef9cf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-indobert_sentiment_analysis_id.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Indonesian indobert_sentiment_analysis BertForSequenceClassification from crypter70 +author: John Snow Labs +name: indobert_sentiment_analysis +date: 2024-09-08 +tags: [id, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: id +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_sentiment_analysis` is a Indonesian model originally trained by crypter70. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_sentiment_analysis_id_5.5.0_3.0_1725767998904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_sentiment_analysis_id_5.5.0_3.0_1725767998904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("indobert_sentiment_analysis","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("indobert_sentiment_analysis", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_sentiment_analysis| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|id| +|Size:|466.4 MB| + +## References + +https://huggingface.co/crypter70/IndoBERT-Sentiment-Analysis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-intent_classifier_frana9812_en.md b/docs/_posts/ahmedlone127/2024-09-08-intent_classifier_frana9812_en.md new file mode 100644 index 00000000000000..5d7b3eb7aebeb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-intent_classifier_frana9812_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English intent_classifier_frana9812 DistilBertForSequenceClassification from Frana9812 +author: John Snow Labs +name: intent_classifier_frana9812 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intent_classifier_frana9812` is a English model originally trained by Frana9812. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intent_classifier_frana9812_en_5.5.0_3.0_1725774858212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intent_classifier_frana9812_en_5.5.0_3.0_1725774858212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("intent_classifier_frana9812","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("intent_classifier_frana9812", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intent_classifier_frana9812| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Frana9812/intent_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-intent_distilbert_classifier_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-intent_distilbert_classifier_pipeline_en.md new file mode 100644 index 00000000000000..d633ae176959fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-intent_distilbert_classifier_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English intent_distilbert_classifier_pipeline pipeline DistilBertForSequenceClassification from Maaz911 +author: John Snow Labs +name: intent_distilbert_classifier_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intent_distilbert_classifier_pipeline` is a English model originally trained by Maaz911. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intent_distilbert_classifier_pipeline_en_5.5.0_3.0_1725764552671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intent_distilbert_classifier_pipeline_en_5.5.0_3.0_1725764552671.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("intent_distilbert_classifier_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("intent_distilbert_classifier_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intent_distilbert_classifier_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Maaz911/intent-distilbert-classifier + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-joo_en.md b/docs/_posts/ahmedlone127/2024-09-08-joo_en.md new file mode 100644 index 00000000000000..ef1d0d577f4e69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-joo_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English joo DistilBertForSequenceClassification from joohwan +author: John Snow Labs +name: joo +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`joo` is a English model originally trained by joohwan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/joo_en_5.5.0_3.0_1725776789604.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/joo_en_5.5.0_3.0_1725776789604.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("joo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("joo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|joo| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/joohwan/joo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_km.md b/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_km.md new file mode 100644 index 00000000000000..bb81231da0bbd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_km.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Central Khmer, Khmer khmer_text_classification_roberta XlmRoBertaForSequenceClassification from seanghay +author: John Snow Labs +name: khmer_text_classification_roberta +date: 2024-09-08 +tags: [km, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: km +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`khmer_text_classification_roberta` is a Central Khmer, Khmer model originally trained by seanghay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/khmer_text_classification_roberta_km_5.5.0_3.0_1725780406583.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/khmer_text_classification_roberta_km_5.5.0_3.0_1725780406583.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("khmer_text_classification_roberta","km") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("khmer_text_classification_roberta", "km") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|khmer_text_classification_roberta| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|km| +|Size:|865.4 MB| + +## References + +https://huggingface.co/seanghay/khmer-text-classification-roberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_pipeline_km.md b/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_pipeline_km.md new file mode 100644 index 00000000000000..90ce9ad350dedb --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-khmer_text_classification_roberta_pipeline_km.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Central Khmer, Khmer khmer_text_classification_roberta_pipeline pipeline XlmRoBertaForSequenceClassification from seanghay +author: John Snow Labs +name: khmer_text_classification_roberta_pipeline +date: 2024-09-08 +tags: [km, open_source, pipeline, onnx] +task: Text Classification +language: km +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`khmer_text_classification_roberta_pipeline` is a Central Khmer, Khmer model originally trained by seanghay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/khmer_text_classification_roberta_pipeline_km_5.5.0_3.0_1725780467152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/khmer_text_classification_roberta_pipeline_km_5.5.0_3.0_1725780467152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("khmer_text_classification_roberta_pipeline", lang = "km") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("khmer_text_classification_roberta_pipeline", lang = "km") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|khmer_text_classification_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|km| +|Size:|865.4 MB| + +## References + +https://huggingface.co/seanghay/khmer-text-classification-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-lenu_ewe_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-lenu_ewe_pipeline_en.md new file mode 100644 index 00000000000000..af0f6590e52996 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-lenu_ewe_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lenu_ewe_pipeline pipeline BertForSequenceClassification from Sociovestix +author: John Snow Labs +name: lenu_ewe_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lenu_ewe_pipeline` is a English model originally trained by Sociovestix. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lenu_ewe_pipeline_en_5.5.0_3.0_1725761267306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lenu_ewe_pipeline_en_5.5.0_3.0_1725761267306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lenu_ewe_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lenu_ewe_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lenu_ewe_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|627.8 MB| + +## References + +https://huggingface.co/Sociovestix/lenu_EE + +## Included Models + +- DocumentAssembler +- TokenizerModel +- BertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-lexuz1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-lexuz1_pipeline_en.md new file mode 100644 index 00000000000000..6ac980600d44e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-lexuz1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English lexuz1_pipeline pipeline RoBertaForSequenceClassification from Ravshan +author: John Snow Labs +name: lexuz1_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lexuz1_pipeline` is a English model originally trained by Ravshan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lexuz1_pipeline_en_5.5.0_3.0_1725778469309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lexuz1_pipeline_en_5.5.0_3.0_1725778469309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("lexuz1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("lexuz1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lexuz1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|314.1 MB| + +## References + +https://huggingface.co/Ravshan/lexuz1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-linkbert_base_en.md b/docs/_posts/ahmedlone127/2024-09-08-linkbert_base_en.md new file mode 100644 index 00000000000000..53d542e9be4113 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-linkbert_base_en.md @@ -0,0 +1,92 @@ +--- +layout: model +title: English linkbert_base BertEmbeddings from michiyasunaga +author: John Snow Labs +name: linkbert_base +date: 2024-09-08 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_base` is a English model originally trained by michiyasunaga. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_base_en_5.5.0_3.0_1725768296511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_base_en_5.5.0_3.0_1725768296511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("linkbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) +``` +```scala +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("linkbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_base| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|405.6 MB| + +## References + +References + +https://huggingface.co/michiyasunaga/LinkBERT-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-luganda_ner_v1_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-luganda_ner_v1_pipeline_en.md new file mode 100644 index 00000000000000..5ec080fc5d1b51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-luganda_ner_v1_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English luganda_ner_v1_pipeline pipeline XlmRoBertaForTokenClassification from Conrad747 +author: John Snow Labs +name: luganda_ner_v1_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`luganda_ner_v1_pipeline` is a English model originally trained by Conrad747. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/luganda_ner_v1_pipeline_en_5.5.0_3.0_1725772785479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/luganda_ner_v1_pipeline_en_5.5.0_3.0_1725772785479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("luganda_ner_v1_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("luganda_ner_v1_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|luganda_ner_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|835.6 MB| + +## References + +https://huggingface.co/Conrad747/luganda-ner-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en.md b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en.md new file mode 100644 index 00000000000000..ce515267646688 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888 MarianTransformer from bill1888 +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888 +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888` is a English model originally trained by bill1888. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en_5.5.0_3.0_1725766364275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888_en_5.5.0_3.0_1725766364275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_bill1888| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|509.8 MB| + +## References + +https://huggingface.co/bill1888/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en.md b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en.md new file mode 100644 index 00000000000000..6bedf36a8dba3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes MarianTransformer from Viennes +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes` is a English model originally trained by Viennes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en_5.5.0_3.0_1725765695720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes_en_5.5.0_3.0_1725765695720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_french_viennes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.1 MB| + +## References + +https://huggingface.co/Viennes/marian-finetuned-kde4-en-to-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en.md b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en.md new file mode 100644 index 00000000000000..3e3629098a13c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3 MarianTransformer from BanUrsus +author: John Snow Labs +name: marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3 +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3` is a English model originally trained by BanUrsus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en_5.5.0_3.0_1725766549877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3_en_5.5.0_3.0_1725766549877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marian_finetuned_kde4_english_tonga_tonga_islands_german_accelerate_translator_nlp_course_chapter7_section3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|499.0 MB| + +## References + +https://huggingface.co/BanUrsus/marian-finetuned-kde4-en-to-de-accelerate-translator_nlp-course-chapter7-section3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-maskedlm_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2024-09-08-maskedlm_finetuned_imdb_en.md new file mode 100644 index 00000000000000..9b8107913b86be --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-maskedlm_finetuned_imdb_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English maskedlm_finetuned_imdb DistilBertEmbeddings from MRP101py +author: John Snow Labs +name: maskedlm_finetuned_imdb +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`maskedlm_finetuned_imdb` is a English model originally trained by MRP101py. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/maskedlm_finetuned_imdb_en_5.5.0_3.0_1725776431558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/maskedlm_finetuned_imdb_en_5.5.0_3.0_1725776431558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("maskedlm_finetuned_imdb","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("maskedlm_finetuned_imdb","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|maskedlm_finetuned_imdb| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/MRP101py/MaskedLM-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-mlm_jjk_subtitle_en.md b/docs/_posts/ahmedlone127/2024-09-08-mlm_jjk_subtitle_en.md new file mode 100644 index 00000000000000..765fec27271fb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-mlm_jjk_subtitle_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English mlm_jjk_subtitle DistilBertEmbeddings from kaiku03 +author: John Snow Labs +name: mlm_jjk_subtitle +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_jjk_subtitle` is a English model originally trained by kaiku03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_jjk_subtitle_en_5.5.0_3.0_1725776375140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_jjk_subtitle_en_5.5.0_3.0_1725776375140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("mlm_jjk_subtitle","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("mlm_jjk_subtitle","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_jjk_subtitle| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kaiku03/MLM_JJK_SUBTITLE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt.md b/docs/_posts/ahmedlone127/2024-09-08-mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt.md new file mode 100644 index 00000000000000..1573a23e1983df --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Portuguese mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline pipeline XlmRoBertaForSequenceClassification from unicamp-dl +author: John Snow Labs +name: mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline +date: 2024-09-08 +tags: [pt, open_source, pipeline, onnx] +task: Text Classification +language: pt +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline` is a Portuguese model originally trained by unicamp-dl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt_5.5.0_3.0_1725780940391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline_pt_5.5.0_3.0_1725780940391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline", lang = "pt") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline", lang = "pt") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mminilm_l6_v2_english_portuguese_msmarco_v1_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|pt| +|Size:|344.0 MB| + +## References + +https://huggingface.co/unicamp-dl/mMiniLM-L6-v2-en-pt-msmarco-v1 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-mpnet_base_nli_matryoshka_yoshinori_sano_en.md b/docs/_posts/ahmedlone127/2024-09-08-mpnet_base_nli_matryoshka_yoshinori_sano_en.md new file mode 100644 index 00000000000000..44800747122549 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-mpnet_base_nli_matryoshka_yoshinori_sano_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English mpnet_base_nli_matryoshka_yoshinori_sano MPNetEmbeddings from yoshinori-sano +author: John Snow Labs +name: mpnet_base_nli_matryoshka_yoshinori_sano +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_base_nli_matryoshka_yoshinori_sano` is a English model originally trained by yoshinori-sano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_base_nli_matryoshka_yoshinori_sano_en_5.5.0_3.0_1725769621129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_base_nli_matryoshka_yoshinori_sano_en_5.5.0_3.0_1725769621129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("mpnet_base_nli_matryoshka_yoshinori_sano","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("mpnet_base_nli_matryoshka_yoshinori_sano","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_base_nli_matryoshka_yoshinori_sano| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|404.2 MB| + +## References + +https://huggingface.co/yoshinori-sano/mpnet-base-nli-matryoshka \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-mpnet_twitter_freq100_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-mpnet_twitter_freq100_pipeline_en.md new file mode 100644 index 00000000000000..a5434cb1c334b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-mpnet_twitter_freq100_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English mpnet_twitter_freq100_pipeline pipeline MPNetEmbeddings from navidmadani +author: John Snow Labs +name: mpnet_twitter_freq100_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mpnet_twitter_freq100_pipeline` is a English model originally trained by navidmadani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mpnet_twitter_freq100_pipeline_en_5.5.0_3.0_1725769074687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mpnet_twitter_freq100_pipeline_en_5.5.0_3.0_1725769074687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("mpnet_twitter_freq100_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("mpnet_twitter_freq100_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mpnet_twitter_freq100_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/navidmadani/mpnet-twitter-freq100 + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-multidim_default_template_en.md b/docs/_posts/ahmedlone127/2024-09-08-multidim_default_template_en.md new file mode 100644 index 00000000000000..7bcea477215be6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-multidim_default_template_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English multidim_default_template DistilBertForSequenceClassification from cshin23 +author: John Snow Labs +name: multidim_default_template +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multidim_default_template` is a English model originally trained by cshin23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multidim_default_template_en_5.5.0_3.0_1725775477222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multidim_default_template_en_5.5.0_3.0_1725775477222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("multidim_default_template","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("multidim_default_template", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multidim_default_template| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cshin23/multidim_default_template \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-multidim_romansh_reg_avg_balanced_default_template_en.md b/docs/_posts/ahmedlone127/2024-09-08-multidim_romansh_reg_avg_balanced_default_template_en.md new file mode 100644 index 00000000000000..6a3b4cb48382aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-multidim_romansh_reg_avg_balanced_default_template_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English multidim_romansh_reg_avg_balanced_default_template DistilBertForSequenceClassification from cshin23 +author: John Snow Labs +name: multidim_romansh_reg_avg_balanced_default_template +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multidim_romansh_reg_avg_balanced_default_template` is a English model originally trained by cshin23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multidim_romansh_reg_avg_balanced_default_template_en_5.5.0_3.0_1725777398878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multidim_romansh_reg_avg_balanced_default_template_en_5.5.0_3.0_1725777398878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("multidim_romansh_reg_avg_balanced_default_template","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("multidim_romansh_reg_avg_balanced_default_template", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multidim_romansh_reg_avg_balanced_default_template| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/cshin23/multidim-rm_reg_avg_balanced_default_template \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-multilingual_xlm_roberta_for_ner_c4n11_xx.md b/docs/_posts/ahmedlone127/2024-09-08-multilingual_xlm_roberta_for_ner_c4n11_xx.md new file mode 100644 index 00000000000000..214cc816a0ac41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-multilingual_xlm_roberta_for_ner_c4n11_xx.md @@ -0,0 +1,94 @@ +--- +layout: model +title: Multilingual multilingual_xlm_roberta_for_ner_c4n11 XlmRoBertaForTokenClassification from c4n11 +author: John Snow Labs +name: multilingual_xlm_roberta_for_ner_c4n11 +date: 2024-09-08 +tags: [xx, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: xx +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_xlm_roberta_for_ner_c4n11` is a Multilingual model originally trained by c4n11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_c4n11_xx_5.5.0_3.0_1725773250254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_xlm_roberta_for_ner_c4n11_xx_5.5.0_3.0_1725773250254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_c4n11","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("multilingual_xlm_roberta_for_ner_c4n11", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_xlm_roberta_for_ner_c4n11| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|xx| +|Size:|839.7 MB| + +## References + +https://huggingface.co/c4n11/multilingual-xlm-roberta-for-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-n2c2_soap_entailment_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-n2c2_soap_entailment_pipeline_en.md new file mode 100644 index 00000000000000..0e37ccc9a60812 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-n2c2_soap_entailment_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English n2c2_soap_entailment_pipeline pipeline RoBertaForSequenceClassification from vsocrates +author: John Snow Labs +name: n2c2_soap_entailment_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`n2c2_soap_entailment_pipeline` is a English model originally trained by vsocrates. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/n2c2_soap_entailment_pipeline_en_5.5.0_3.0_1725778896120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/n2c2_soap_entailment_pipeline_en_5.5.0_3.0_1725778896120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("n2c2_soap_entailment_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("n2c2_soap_entailment_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|n2c2_soap_entailment_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/vsocrates/n2c2-soap-entailment + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-n_roberta_imdb_padding10model_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-n_roberta_imdb_padding10model_pipeline_en.md new file mode 100644 index 00000000000000..d3a02e562e3a32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-n_roberta_imdb_padding10model_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English n_roberta_imdb_padding10model_pipeline pipeline RoBertaForSequenceClassification from Realgon +author: John Snow Labs +name: n_roberta_imdb_padding10model_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`n_roberta_imdb_padding10model_pipeline` is a English model originally trained by Realgon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/n_roberta_imdb_padding10model_pipeline_en_5.5.0_3.0_1725779263459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/n_roberta_imdb_padding10model_pipeline_en_5.5.0_3.0_1725779263459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("n_roberta_imdb_padding10model_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("n_roberta_imdb_padding10model_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|n_roberta_imdb_padding10model_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|463.1 MB| + +## References + +https://huggingface.co/Realgon/N_roberta_imdb_padding10model + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en.md new file mode 100644 index 00000000000000..4028c21234cc3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline pipeline MarianTransformer from meghazisofiane +author: John Snow Labs +name: opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline` is a English model originally trained by meghazisofiane. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en_5.5.0_3.0_1725766126458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline_en_5.5.0_3.0_1725766126458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_arabic_evaluated_english_tonga_tonga_islands_arabic_2000instancesopus_leaningrate2e_05_batchsize8_11epoch_3_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|528.7 MB| + +## References + +https://huggingface.co/meghazisofiane/opus-mt-en-ar-evaluated-en-to-ar-2000instancesopus-leaningRate2e-05-batchSize8-11epoch-3 + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_bkm_10e6encdec_en.md b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_bkm_10e6encdec_en.md new file mode 100644 index 00000000000000..6ae133757e1f29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_bkm_10e6encdec_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_bkm_10e6encdec MarianTransformer from kalese +author: John Snow Labs +name: opus_maltese_english_bkm_10e6encdec +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_bkm_10e6encdec` is a English model originally trained by kalese. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_bkm_10e6encdec_en_5.5.0_3.0_1725766187880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_bkm_10e6encdec_en_5.5.0_3.0_1725766187880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_bkm_10e6encdec","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_bkm_10e6encdec","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_bkm_10e6encdec| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|508.2 MB| + +## References + +https://huggingface.co/kalese/opus-mt-en-bkm-10e6encdec \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_dutch_finetuned_20k_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_dutch_finetuned_20k_pipeline_en.md new file mode 100644 index 00000000000000..e08ba66dfaa6f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_dutch_finetuned_20k_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_dutch_finetuned_20k_pipeline pipeline MarianTransformer from kalcho100 +author: John Snow Labs +name: opus_maltese_english_dutch_finetuned_20k_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_dutch_finetuned_20k_pipeline` is a English model originally trained by kalcho100. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_dutch_finetuned_20k_pipeline_en_5.5.0_3.0_1725766010175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_dutch_finetuned_20k_pipeline_en_5.5.0_3.0_1725766010175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_dutch_finetuned_20k_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_dutch_finetuned_20k_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_dutch_finetuned_20k_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|549.3 MB| + +## References + +https://huggingface.co/kalcho100/opus-mt-en-nl-finetuned_20k + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en.md b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en.md new file mode 100644 index 00000000000000..da55453d62da2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese MarianTransformer from ldh243 +author: John Snow Labs +name: opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese` is a English model originally trained by ldh243. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en_5.5.0_3.0_1725765316057.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese_en_5.5.0_3.0_1725765316057.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_japanese_finetuned_english_tonga_tonga_islands_japanese| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|430.5 MB| + +## References + +https://huggingface.co/ldh243/opus-mt-en-jap-finetuned-en-to-ja \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en.md new file mode 100644 index 00000000000000..06f9460323f268 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline pipeline MarianTransformer from himanshubeniwal +author: John Snow Labs +name: opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline` is a English model originally trained by himanshubeniwal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en_5.5.0_3.0_1725766727999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline_en_5.5.0_3.0_1725766727999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opus_maltese_english_romanian_finetuned_romanian_tonga_tonga_islands_english_agreement_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|508.6 MB| + +## References + +https://huggingface.co/himanshubeniwal/opus-mt-en-ro-finetuned-ro-to-en-agreement + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-pebblo_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-pebblo_classifier_en.md new file mode 100644 index 00000000000000..2ec052a001183d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-pebblo_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English pebblo_classifier DistilBertForSequenceClassification from daxa-ai +author: John Snow Labs +name: pebblo_classifier +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pebblo_classifier` is a English model originally trained by daxa-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pebblo_classifier_en_5.5.0_3.0_1725764464426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pebblo_classifier_en_5.5.0_3.0_1725764464426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("pebblo_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("pebblo_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pebblo_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/daxa-ai/pebblo-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-platzi_en.md b/docs/_posts/ahmedlone127/2024-09-08-platzi_en.md new file mode 100644 index 00000000000000..4accf66b504201 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-platzi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English platzi RoBertaForSequenceClassification from joacorf33 +author: John Snow Labs +name: platzi +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`platzi` is a English model originally trained by joacorf33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/platzi_en_5.5.0_3.0_1725778170293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/platzi_en_5.5.0_3.0_1725778170293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("platzi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|platzi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/joacorf33/platzi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-platzi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-platzi_pipeline_en.md new file mode 100644 index 00000000000000..233664f92ad315 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-platzi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English platzi_pipeline pipeline RoBertaForSequenceClassification from joacorf33 +author: John Snow Labs +name: platzi_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`platzi_pipeline` is a English model originally trained by joacorf33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/platzi_pipeline_en_5.5.0_3.0_1725778184206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/platzi_pipeline_en_5.5.0_3.0_1725778184206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("platzi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("platzi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|platzi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|308.6 MB| + +## References + +https://huggingface.co/joacorf33/platzi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en.md b/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en.md new file mode 100644 index 00000000000000..6e6b3176503bbf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English portuguese_up_xlmr_oneshot_falsetrue_0_2_best XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_up_xlmr_oneshot_falsetrue_0_2_best +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_up_xlmr_oneshot_falsetrue_0_2_best` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en_5.5.0_3.0_1725780108160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_oneshot_falsetrue_0_2_best_en_5.5.0_3.0_1725780108160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_up_xlmr_oneshot_falsetrue_0_2_best","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("portuguese_up_xlmr_oneshot_falsetrue_0_2_best", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_up_xlmr_oneshot_falsetrue_0_2_best| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|782.7 MB| + +## References + +https://huggingface.co/harish/PT-UP-xlmR-OneShot-FalseTrue-0_2_BEST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en.md new file mode 100644 index 00000000000000..3f0034eb0b2e7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline pipeline XlmRoBertaForSequenceClassification from harish +author: John Snow Labs +name: portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline` is a English model originally trained by harish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en_5.5.0_3.0_1725780244043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline_en_5.5.0_3.0_1725780244043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|portuguese_up_xlmr_oneshot_falsetrue_0_2_best_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|782.8 MB| + +## References + +https://huggingface.co/harish/PT-UP-xlmR-OneShot-FalseTrue-0_2_BEST + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-predict_perception_xlmr_focus_assassin_en.md b/docs/_posts/ahmedlone127/2024-09-08-predict_perception_xlmr_focus_assassin_en.md new file mode 100644 index 00000000000000..1c485547fbe2b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-predict_perception_xlmr_focus_assassin_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English predict_perception_xlmr_focus_assassin XlmRoBertaForSequenceClassification from responsibility-framing +author: John Snow Labs +name: predict_perception_xlmr_focus_assassin +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predict_perception_xlmr_focus_assassin` is a English model originally trained by responsibility-framing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_focus_assassin_en_5.5.0_3.0_1725780882852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predict_perception_xlmr_focus_assassin_en_5.5.0_3.0_1725780882852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_focus_assassin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("predict_perception_xlmr_focus_assassin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predict_perception_xlmr_focus_assassin| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|837.6 MB| + +## References + +https://huggingface.co/responsibility-framing/predict-perception-xlmr-focus-assassin \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_en.md b/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_en.md new file mode 100644 index 00000000000000..8346c319c8a95f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English psais_multi_qa_mpnet_base_dot_v1_8shot MPNetEmbeddings from hroth +author: John Snow Labs +name: psais_multi_qa_mpnet_base_dot_v1_8shot +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`psais_multi_qa_mpnet_base_dot_v1_8shot` is a English model originally trained by hroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/psais_multi_qa_mpnet_base_dot_v1_8shot_en_5.5.0_3.0_1725769317012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/psais_multi_qa_mpnet_base_dot_v1_8shot_en_5.5.0_3.0_1725769317012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("psais_multi_qa_mpnet_base_dot_v1_8shot","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("psais_multi_qa_mpnet_base_dot_v1_8shot","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|psais_multi_qa_mpnet_base_dot_v1_8shot| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/hroth/psais-multi-qa-mpnet-base-dot-v1-8shot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en.md new file mode 100644 index 00000000000000..9bbd210b1a2218 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline pipeline MPNetEmbeddings from hroth +author: John Snow Labs +name: psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline` is a English model originally trained by hroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en_5.5.0_3.0_1725769337523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline_en_5.5.0_3.0_1725769337523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|psais_multi_qa_mpnet_base_dot_v1_8shot_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/hroth/psais-multi-qa-mpnet-base-dot-v1-8shot + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-qa_model9_test_en.md b/docs/_posts/ahmedlone127/2024-09-08-qa_model9_test_en.md new file mode 100644 index 00000000000000..5265739a7f76d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-qa_model9_test_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English qa_model9_test RoBertaForQuestionAnswering from MattNandavong +author: John Snow Labs +name: qa_model9_test +date: 2024-09-08 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qa_model9_test` is a English model originally trained by MattNandavong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qa_model9_test_en_5.5.0_3.0_1725757685534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qa_model9_test_en_5.5.0_3.0_1725757685534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("qa_model9_test","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("qa_model9_test", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qa_model9_test| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.5 MB| + +## References + +https://huggingface.co/MattNandavong/QA_model9-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-quality_model_apr3_en.md b/docs/_posts/ahmedlone127/2024-09-08-quality_model_apr3_en.md new file mode 100644 index 00000000000000..f9fe798fd687bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-quality_model_apr3_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English quality_model_apr3 DistilBertForSequenceClassification from vtiyyal1 +author: John Snow Labs +name: quality_model_apr3 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quality_model_apr3` is a English model originally trained by vtiyyal1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quality_model_apr3_en_5.5.0_3.0_1725776744841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quality_model_apr3_en_5.5.0_3.0_1725776744841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("quality_model_apr3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("quality_model_apr3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quality_model_apr3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/vtiyyal1/quality_model_apr3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-recommend_songs_en.md b/docs/_posts/ahmedlone127/2024-09-08-recommend_songs_en.md new file mode 100644 index 00000000000000..c1f599e8b2b716 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-recommend_songs_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English recommend_songs DistilBertForSequenceClassification from manoh2f2 +author: John Snow Labs +name: recommend_songs +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recommend_songs` is a English model originally trained by manoh2f2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recommend_songs_en_5.5.0_3.0_1725774745841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recommend_songs_en_5.5.0_3.0_1725774745841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("recommend_songs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("recommend_songs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recommend_songs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/manoh2f2/recommend_songs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-resume_sentence_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-resume_sentence_classifier_en.md new file mode 100644 index 00000000000000..3c782344fa4395 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-resume_sentence_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English resume_sentence_classifier DistilBertForSequenceClassification from Ronysalem +author: John Snow Labs +name: resume_sentence_classifier +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`resume_sentence_classifier` is a English model originally trained by Ronysalem. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/resume_sentence_classifier_en_5.5.0_3.0_1725776940222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/resume_sentence_classifier_en_5.5.0_3.0_1725776940222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("resume_sentence_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("resume_sentence_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|resume_sentence_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/Ronysalem/Resume_sentence_classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_base_emotion_pysentimiento_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_base_emotion_pysentimiento_pipeline_en.md new file mode 100644 index 00000000000000..bc6c6838746b67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_base_emotion_pysentimiento_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_base_emotion_pysentimiento_pipeline pipeline RoBertaForSequenceClassification from pysentimiento +author: John Snow Labs +name: roberta_base_emotion_pysentimiento_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_emotion_pysentimiento_pipeline` is a English model originally trained by pysentimiento. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_emotion_pysentimiento_pipeline_en_5.5.0_3.0_1725778343668.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_emotion_pysentimiento_pipeline_en_5.5.0_3.0_1725778343668.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_base_emotion_pysentimiento_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_base_emotion_pysentimiento_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_emotion_pysentimiento_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|456.0 MB| + +## References + +https://huggingface.co/pysentimiento/roberta-base-emotion + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_finetuned_subjqa_movies_2_ram20307_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_finetuned_subjqa_movies_2_ram20307_en.md new file mode 100644 index 00000000000000..4e53937302c9c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_finetuned_subjqa_movies_2_ram20307_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English roberta_finetuned_subjqa_movies_2_ram20307 RoBertaForQuestionAnswering from Ram20307 +author: John Snow Labs +name: roberta_finetuned_subjqa_movies_2_ram20307 +date: 2024-09-08 +tags: [en, open_source, onnx, question_answering, roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_finetuned_subjqa_movies_2_ram20307` is a English model originally trained by Ram20307. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_ram20307_en_5.5.0_3.0_1725758111742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_finetuned_subjqa_movies_2_ram20307_en_5.5.0_3.0_1725758111742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_ram20307","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_finetuned_subjqa_movies_2_ram20307", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_finetuned_subjqa_movies_2_ram20307| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/Ram20307/roberta-finetuned-subjqa-movies_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_news_classification_aparnaullas_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_news_classification_aparnaullas_pipeline_en.md new file mode 100644 index 00000000000000..f05f411ddcf3e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_news_classification_aparnaullas_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_news_classification_aparnaullas_pipeline pipeline RoBertaForSequenceClassification from AparnaUllas +author: John Snow Labs +name: roberta_news_classification_aparnaullas_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_news_classification_aparnaullas_pipeline` is a English model originally trained by AparnaUllas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_news_classification_aparnaullas_pipeline_en_5.5.0_3.0_1725778730712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_news_classification_aparnaullas_pipeline_en_5.5.0_3.0_1725778730712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_news_classification_aparnaullas_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_news_classification_aparnaullas_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_news_classification_aparnaullas_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|430.2 MB| + +## References + +https://huggingface.co/AparnaUllas/Roberta-news-classification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_QA_for_Event_Extraction_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_QA_for_Event_Extraction_en.md new file mode 100644 index 00000000000000..1d99d74a2ae908 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_QA_for_Event_Extraction_en.md @@ -0,0 +1,109 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from veronica320) +author: John Snow Labs +name: roberta_qa_QA_for_Event_Extraction +date: 2024-09-08 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `QA-for-Event-Extraction` is a English model originally trained by `veronica320`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_QA_for_Event_Extraction_en_5.5.0_3.0_1725757514356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_QA_for_Event_Extraction_en_5.5.0_3.0_1725757514356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_QA_for_Event_Extraction","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_QA_for_Event_Extraction","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.by_veronica320").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_QA_for_Event_Extraction| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/veronica320/QA-for-Event-Extraction +- https://aclanthology.org/2021.acl-short.42/ +- https://github.com/veronica320/Zeroshot-Event-Extraction +- https://github.com/uwnlp/qamr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_en.md new file mode 100644 index 00000000000000..936e9c3f47a661 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from shmuelamar) +author: John Snow Labs +name: roberta_qa_REQA_RoBERTa +date: 2024-09-08 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `REQA-RoBERTa` is a English model originally trained by `shmuelamar`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_REQA_RoBERTa_en_5.5.0_3.0_1725758166524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_REQA_RoBERTa_en_5.5.0_3.0_1725758166524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_REQA_RoBERTa","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_REQA_RoBERTa","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.roberta.by_shmuelamar").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_REQA_RoBERTa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|1.3 GB| + +## References + +References + +- https://huggingface.co/shmuelamar/REQA-RoBERTa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_pipeline_en.md new file mode 100644 index 00000000000000..cafd707e3c9b0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_REQA_RoBERTa_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English roberta_qa_REQA_RoBERTa_pipeline pipeline RoBertaForQuestionAnswering from shmuelamar +author: John Snow Labs +name: roberta_qa_REQA_RoBERTa_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_qa_REQA_RoBERTa_pipeline` is a English model originally trained by shmuelamar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_REQA_RoBERTa_pipeline_en_5.5.0_3.0_1725758227331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_REQA_RoBERTa_pipeline_en_5.5.0_3.0_1725758227331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_qa_REQA_RoBERTa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_qa_REQA_RoBERTa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_REQA_RoBERTa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shmuelamar/REQA-RoBERTa + +## Included Models + +- MultiDocumentAssembler +- RoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_roberta_base_finetuned_scrambled_squad_5_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_roberta_base_finetuned_scrambled_squad_5_en.md new file mode 100644 index 00000000000000..9dcf416aa2f507 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_qa_roberta_base_finetuned_scrambled_squad_5_en.md @@ -0,0 +1,106 @@ +--- +layout: model +title: English RobertaForQuestionAnswering (from huxxx657) +author: John Snow Labs +name: roberta_qa_roberta_base_finetuned_scrambled_squad_5 +date: 2024-09-08 +tags: [en, open_source, question_answering, roberta, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `roberta-base-finetuned-scrambled-squad-5` is a English model originally trained by `huxxx657`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_finetuned_scrambled_squad_5_en_5.5.0_3.0_1725757331304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_qa_roberta_base_finetuned_scrambled_squad_5_en_5.5.0_3.0_1725757331304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +document_assembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = RoBertaForQuestionAnswering.pretrained("roberta_qa_roberta_base_finetuned_scrambled_squad_5","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer") \ +.setCaseSensitive(True) + +pipeline = Pipeline().setStages([ +document_assembler, +spanClassifier +]) + +example = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(example).transform(example) +``` +```scala +val document = new MultiDocumentAssembler() +.setInputCols("question", "context") +.setOutputCols("document_question", "document_context") + +val spanClassifier = RoBertaForQuestionAnswering +.pretrained("roberta_qa_roberta_base_finetuned_scrambled_squad_5","en") +.setInputCols(Array("document_question", "document_context")) +.setOutputCol("answer") +.setCaseSensitive(true) +.setMaxSentenceLength(512) + +val pipeline = new Pipeline().setStages(Array(document, spanClassifier)) + +val example = Seq( +("Where was John Lenon born?", "John Lenon was born in London and lived in Paris. My name is Sarah and I live in London."), +("What's my name?", "My name is Clara and I live in Berkeley.")) +.toDF("question", "context") + +val result = pipeline.fit(example).transform(example) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.roberta.base_scrambled_5.by_huxxx657").predict("""What's my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_qa_roberta_base_finetuned_scrambled_squad_5| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|463.3 MB| + +## References + +References + +- https://huggingface.co/huxxx657/roberta-base-finetuned-scrambled-squad-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-roberta_soft_llm_multip_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-roberta_soft_llm_multip_pipeline_en.md new file mode 100644 index 00000000000000..e6eebce57cdd8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-roberta_soft_llm_multip_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English roberta_soft_llm_multip_pipeline pipeline RoBertaForSequenceClassification from Multiperspective +author: John Snow Labs +name: roberta_soft_llm_multip_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_soft_llm_multip_pipeline` is a English model originally trained by Multiperspective. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_soft_llm_multip_pipeline_en_5.5.0_3.0_1725778407627.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_soft_llm_multip_pipeline_en_5.5.0_3.0_1725778407627.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("roberta_soft_llm_multip_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("roberta_soft_llm_multip_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_soft_llm_multip_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Multiperspective/roberta-soft-llm_multip + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-romanurduclassification_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-romanurduclassification_pipeline_en.md new file mode 100644 index 00000000000000..54f35dc2c59c2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-romanurduclassification_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English romanurduclassification_pipeline pipeline RoBertaForSequenceClassification from mwz +author: John Snow Labs +name: romanurduclassification_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`romanurduclassification_pipeline` is a English model originally trained by mwz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/romanurduclassification_pipeline_en_5.5.0_3.0_1725778312368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/romanurduclassification_pipeline_en_5.5.0_3.0_1725778312368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("romanurduclassification_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("romanurduclassification_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|romanurduclassification_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|473.2 MB| + +## References + +https://huggingface.co/mwz/RomanUrduClassification + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-rulebert_v0_4_k0_pipeline_it.md b/docs/_posts/ahmedlone127/2024-09-08-rulebert_v0_4_k0_pipeline_it.md new file mode 100644 index 00000000000000..2b958f9fe82d73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-rulebert_v0_4_k0_pipeline_it.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Italian rulebert_v0_4_k0_pipeline pipeline XlmRoBertaForSequenceClassification from ribesstefano +author: John Snow Labs +name: rulebert_v0_4_k0_pipeline +date: 2024-09-08 +tags: [it, open_source, pipeline, onnx] +task: Text Classification +language: it +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rulebert_v0_4_k0_pipeline` is a Italian model originally trained by ribesstefano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rulebert_v0_4_k0_pipeline_it_5.5.0_3.0_1725780069100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rulebert_v0_4_k0_pipeline_it_5.5.0_3.0_1725780069100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("rulebert_v0_4_k0_pipeline", lang = "it") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("rulebert_v0_4_k0_pipeline", lang = "it") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rulebert_v0_4_k0_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|it| +|Size:|870.5 MB| + +## References + +https://huggingface.co/ribesstefano/RuleBert-v0.4-k0 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-semanlink_all_mpnet_base_v2_en.md b/docs/_posts/ahmedlone127/2024-09-08-semanlink_all_mpnet_base_v2_en.md new file mode 100644 index 00000000000000..6047d240b95926 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-semanlink_all_mpnet_base_v2_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English semanlink_all_mpnet_base_v2 MPNetEmbeddings from raphaelsty +author: John Snow Labs +name: semanlink_all_mpnet_base_v2 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`semanlink_all_mpnet_base_v2` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/semanlink_all_mpnet_base_v2_en_5.5.0_3.0_1725769747647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/semanlink_all_mpnet_base_v2_en_5.5.0_3.0_1725769747647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("semanlink_all_mpnet_base_v2","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("semanlink_all_mpnet_base_v2","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|semanlink_all_mpnet_base_v2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/raphaelsty/semanlink_all_mpnet_base_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_memo_model_2500_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_memo_model_2500_pipeline_en.md new file mode 100644 index 00000000000000..d0cc38025edf92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_memo_model_2500_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_memo_model_2500_pipeline pipeline XlmRoBertaSentenceEmbeddings from yemen2016 +author: John Snow Labs +name: sent_memo_model_2500_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_memo_model_2500_pipeline` is a English model originally trained by yemen2016. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_memo_model_2500_pipeline_en_5.5.0_3.0_1725759556062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_memo_model_2500_pipeline_en_5.5.0_3.0_1725759556062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_memo_model_2500_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_memo_model_2500_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_memo_model_2500_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/yemen2016/memo_model_2500 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_norwegian_bokml_roberta_base_scandi_1e4_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_norwegian_bokml_roberta_base_scandi_1e4_en.md new file mode 100644 index 00000000000000..9aab4c78f4068d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_norwegian_bokml_roberta_base_scandi_1e4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_norwegian_bokml_roberta_base_scandi_1e4 XlmRoBertaSentenceEmbeddings from NbAiLab +author: John Snow Labs +name: sent_norwegian_bokml_roberta_base_scandi_1e4 +date: 2024-09-08 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_norwegian_bokml_roberta_base_scandi_1e4` is a English model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_roberta_base_scandi_1e4_en_5.5.0_3.0_1725759428216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_norwegian_bokml_roberta_base_scandi_1e4_en_5.5.0_3.0_1725759428216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_norwegian_bokml_roberta_base_scandi_1e4","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_norwegian_bokml_roberta_base_scandi_1e4","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_norwegian_bokml_roberta_base_scandi_1e4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLab/nb-roberta-base-scandi-1e4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_r_with_transliteration_max_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_r_with_transliteration_max_pipeline_en.md new file mode 100644 index 00000000000000..2d2d2676f0c3bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_r_with_transliteration_max_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_r_with_transliteration_max_pipeline pipeline XlmRoBertaSentenceEmbeddings from yihongLiu +author: John Snow Labs +name: sent_xlm_r_with_transliteration_max_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_r_with_transliteration_max_pipeline` is a English model originally trained by yihongLiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_max_pipeline_en_5.5.0_3.0_1725763650738.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_r_with_transliteration_max_pipeline_en_5.5.0_3.0_1725763650738.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_r_with_transliteration_max_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_r_with_transliteration_max_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_r_with_transliteration_max_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.9 MB| + +## References + +https://huggingface.co/yihongLiu/xlm-r-with-transliteration-max + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_amharic_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_amharic_pipeline_en.md new file mode 100644 index 00000000000000..82c67f0ce6ee4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_amharic_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_amharic_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_amharic_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_amharic_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_amharic_pipeline_en_5.5.0_3.0_1725759142989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_amharic_pipeline_en_5.5.0_3.0_1725759142989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_amharic_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_amharic_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_amharic_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-amharic + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_en.md new file mode 100644 index 00000000000000..d69aa897381dca --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_malagasy XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_malagasy +date: 2024-09-08 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_malagasy` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_malagasy_en_5.5.0_3.0_1725759090735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_malagasy_en_5.5.0_3.0_1725759090735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_malagasy","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_malagasy","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_malagasy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-malagasy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_pipeline_en.md new file mode 100644 index 00000000000000..4a86cea194d4ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_malagasy_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_malagasy_pipeline pipeline XlmRoBertaSentenceEmbeddings from Davlan +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_malagasy_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_malagasy_pipeline` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_malagasy_pipeline_en_5.5.0_3.0_1725759143171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_malagasy_pipeline_en_5.5.0_3.0_1725759143171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_malagasy_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_malagasy_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_malagasy_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-malagasy + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_en.md new file mode 100644 index 00000000000000..590e72d4dd5834 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_questions XlmRoBertaSentenceEmbeddings from lucazed +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_questions +date: 2024-09-08 +tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_questions` is a English model originally trained by lucazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_questions_en_5.5.0_3.0_1725763023258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_questions_en_5.5.0_3.0_1725763023258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("sentence") + +embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_questions","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_roberta_base_finetuned_questions","en") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_questions| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|976.8 MB| + +## References + +https://huggingface.co/lucazed/xlm-roberta-base-finetuned-questions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_pipeline_en.md new file mode 100644 index 00000000000000..7ea77cfaa950c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sent_xlm_roberta_base_finetuned_questions_pipeline_en.md @@ -0,0 +1,71 @@ +--- +layout: model +title: English sent_xlm_roberta_base_finetuned_questions_pipeline pipeline XlmRoBertaSentenceEmbeddings from lucazed +author: John Snow Labs +name: sent_xlm_roberta_base_finetuned_questions_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_roberta_base_finetuned_questions_pipeline` is a English model originally trained by lucazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_questions_pipeline_en_5.5.0_3.0_1725763086183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_roberta_base_finetuned_questions_pipeline_en_5.5.0_3.0_1725763086183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sent_xlm_roberta_base_finetuned_questions_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sent_xlm_roberta_base_finetuned_questions_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sent_xlm_roberta_base_finetuned_questions_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|977.4 MB| + +## References + +https://huggingface.co/lucazed/xlm-roberta-base-finetuned-questions + +## Included Models + +- DocumentAssembler +- TokenizerModel +- SentenceDetectorDLModel +- XlmRoBertaSentenceEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-seq2seq_finetuned_slang_english_en.md b/docs/_posts/ahmedlone127/2024-09-08-seq2seq_finetuned_slang_english_en.md new file mode 100644 index 00000000000000..678a64b02f2693 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-seq2seq_finetuned_slang_english_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English seq2seq_finetuned_slang_english MarianTransformer from Lipas007 +author: John Snow Labs +name: seq2seq_finetuned_slang_english +date: 2024-09-08 +tags: [en, open_source, onnx, translation, marian] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MarianTransformer +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`seq2seq_finetuned_slang_english` is a English model originally trained by Lipas007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/seq2seq_finetuned_slang_english_en_5.5.0_3.0_1725766666236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/seq2seq_finetuned_slang_english_en_5.5.0_3.0_1725766666236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \ + .setInputCols(["document"]) \ + .setOutputCol("translation") + +marian = MarianTransformer.pretrained("seq2seq_finetuned_slang_english","en") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, sentenceDL, marian]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val marian = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(Array("document")) + .setOutputCol("sentence") + +val embeddings = MarianTransformer.pretrained("seq2seq_finetuned_slang_english","en") + .setInputCols(Array("sentence")) + .setOutputCol("translation") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, marian)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|seq2seq_finetuned_slang_english| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentences]| +|Output Labels:|[translation]| +|Language:|en| +|Size:|535.2 MB| + +## References + +https://huggingface.co/Lipas007/seq2seq-finetuned-slang-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_en.md b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_en.md new file mode 100644 index 00000000000000..5c03d2aa6f46b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_ireland_3labels_balanced_data MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_3labels_balanced_data +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_3labels_balanced_data` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_3labels_balanced_data_en_5.5.0_3.0_1725769620254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_3labels_balanced_data_en_5.5.0_3.0_1725769620254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_3labels_balanced_data","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_3labels_balanced_data","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_3labels_balanced_data| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Ireland_3labels_balanced_data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_pipeline_en.md new file mode 100644 index 00000000000000..917b3cd9eff508 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_3labels_balanced_data_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English setfit_model_ireland_3labels_balanced_data_pipeline pipeline MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_3labels_balanced_data_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_3labels_balanced_data_pipeline` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_3labels_balanced_data_pipeline_en_5.5.0_3.0_1725769639187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_3labels_balanced_data_pipeline_en_5.5.0_3.0_1725769639187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("setfit_model_ireland_3labels_balanced_data_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("setfit_model_ireland_3labels_balanced_data_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_3labels_balanced_data_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Ireland_3labels_balanced_data + +## Included Models + +- DocumentAssembler +- MPNetEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_4labels_unbalanced_data_3epochs_en.md b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_4labels_unbalanced_data_3epochs_en.md new file mode 100644 index 00000000000000..7e38360ef81eee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_4labels_unbalanced_data_3epochs_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_ireland_4labels_unbalanced_data_3epochs MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_4labels_unbalanced_data_3epochs +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_4labels_unbalanced_data_3epochs` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_3epochs_en_5.5.0_3.0_1725769458659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_4labels_unbalanced_data_3epochs_en_5.5.0_3.0_1725769458659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_4labels_unbalanced_data_3epochs","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_4labels_unbalanced_data_3epochs","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_4labels_unbalanced_data_3epochs| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/mitra-mir/setfit-model-Ireland_4labels_unbalanced_data_3epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en.md b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en.md new file mode 100644 index 00000000000000..21f4c42f749ac9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English setfit_model_ireland_binary_label1_epochs2_feb_28_2023 MPNetEmbeddings from mitra-mir +author: John Snow Labs +name: setfit_model_ireland_binary_label1_epochs2_feb_28_2023 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, mpnet] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: MPNetEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MPNetEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`setfit_model_ireland_binary_label1_epochs2_feb_28_2023` is a English model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en_5.5.0_3.0_1725769196441.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/setfit_model_ireland_binary_label1_epochs2_feb_28_2023_en_5.5.0_3.0_1725769196441.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_binary_label1_epochs2_feb_28_2023","en") \ + .setInputCols(["document"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val embeddings = MPNetEmbeddings.pretrained("setfit_model_ireland_binary_label1_epochs2_feb_28_2023","en") + .setInputCols(Array("document")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|setfit_model_ireland_binary_label1_epochs2_feb_28_2023| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document]| +|Output Labels:|[mpnet]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/mitra-mir/setfit_model_Ireland_binary_label1_epochs2_Feb_28_2023 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-sota_4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-sota_4_pipeline_en.md new file mode 100644 index 00000000000000..90982d7f95b34b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-sota_4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English sota_4_pipeline pipeline RoBertaForSequenceClassification from BaronSch +author: John Snow Labs +name: sota_4_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sota_4_pipeline` is a English model originally trained by BaronSch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sota_4_pipeline_en_5.5.0_3.0_1725778964088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sota_4_pipeline_en_5.5.0_3.0_1725778964088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("sota_4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("sota_4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sota_4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|468.5 MB| + +## References + +https://huggingface.co/BaronSch/SOTA_4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-spark_name_burmese_tonga_tonga_islands_english_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-spark_name_burmese_tonga_tonga_islands_english_pipeline_en.md new file mode 100644 index 00000000000000..3f991431ed2986 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-spark_name_burmese_tonga_tonga_islands_english_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English spark_name_burmese_tonga_tonga_islands_english_pipeline pipeline MarianTransformer from ihebaker10 +author: John Snow Labs +name: spark_name_burmese_tonga_tonga_islands_english_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Translation +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained MarianTransformer, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spark_name_burmese_tonga_tonga_islands_english_pipeline` is a English model originally trained by ihebaker10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spark_name_burmese_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725766069800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spark_name_burmese_tonga_tonga_islands_english_pipeline_en_5.5.0_3.0_1725766069800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("spark_name_burmese_tonga_tonga_islands_english_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("spark_name_burmese_tonga_tonga_islands_english_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spark_name_burmese_tonga_tonga_islands_english_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|533.3 MB| + +## References + +https://huggingface.co/ihebaker10/spark-name-my-to-en + +## Included Models + +- DocumentAssembler +- SentenceDetectorDLModel +- MarianTransformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-speech_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-speech_pipeline_en.md new file mode 100644 index 00000000000000..742cd41ee64bee --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-speech_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English speech_pipeline pipeline WhisperForCTC from tluo23 +author: John Snow Labs +name: speech_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`speech_pipeline` is a English model originally trained by tluo23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/speech_pipeline_en_5.5.0_3.0_1725753841977.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/speech_pipeline_en_5.5.0_3.0_1725753841977.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("speech_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("speech_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|speech_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.7 GB| + +## References + +https://huggingface.co/tluo23/speech + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en.md b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en.md new file mode 100644 index 00000000000000..956b3137f4ffa3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42 DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en_5.5.0_3.0_1725776844140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_en_5.5.0_3.0_1725776844140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/stego-classifier-checkpoint-epoch-0-2024-07-26_11-37-42 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en.md new file mode 100644 index 00000000000000..2744b0684af66e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline pipeline DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en_5.5.0_3.0_1725776855991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline_en_5.5.0_3.0_1725776855991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stego_classifier_checkpoint_epoch_0_2024_07_26_11_37_42_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/stego-classifier-checkpoint-epoch-0-2024-07-26_11-37-42 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en.md b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en.md new file mode 100644 index 00000000000000..9ce316aff2e718 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52 DistilBertForSequenceClassification from jvelja +author: John Snow Labs +name: stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52` is a English model originally trained by jvelja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en_5.5.0_3.0_1725775545845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52_en_5.5.0_3.0_1725775545845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|stego_classifier_checkpoint_epoch_10_2024_07_26_14_26_52| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/jvelja/stego-classifier-checkpoint-epoch-10-2024-07-26_14-26-52 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-test999_en.md b/docs/_posts/ahmedlone127/2024-09-08-test999_en.md new file mode 100644 index 00000000000000..2577acdc4beacf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-test999_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English test999 XlmRoBertaEmbeddings from NbAiLabArchive +author: John Snow Labs +name: test999 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test999` is a English model originally trained by NbAiLabArchive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test999_en_5.5.0_3.0_1725770533847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test999_en_5.5.0_3.0_1725770533847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("test999","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("test999","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test999| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLabArchive/test999 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-test999_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-test999_pipeline_en.md new file mode 100644 index 00000000000000..a7146dc0244456 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-test999_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test999_pipeline pipeline XlmRoBertaEmbeddings from NbAiLabArchive +author: John Snow Labs +name: test999_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test999_pipeline` is a English model originally trained by NbAiLabArchive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test999_pipeline_en_5.5.0_3.0_1725770579618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test999_pipeline_en_5.5.0_3.0_1725770579618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test999_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test999_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test999_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/NbAiLabArchive/test999 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaEmbeddings \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_en.md b/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_en.md new file mode 100644 index 00000000000000..29c2a34ae15f58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English test_trainer4 DistilBertForSequenceClassification from SimoneJLaudani +author: John Snow Labs +name: test_trainer4 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_trainer4` is a English model originally trained by SimoneJLaudani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_trainer4_en_5.5.0_3.0_1725777283531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_trainer4_en_5.5.0_3.0_1725777283531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("test_trainer4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("test_trainer4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_trainer4| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|246.0 MB| + +## References + +https://huggingface.co/SimoneJLaudani/test_trainer4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_pipeline_en.md new file mode 100644 index 00000000000000..8fe128d3dc1694 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-test_trainer4_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English test_trainer4_pipeline pipeline DistilBertForSequenceClassification from SimoneJLaudani +author: John Snow Labs +name: test_trainer4_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_trainer4_pipeline` is a English model originally trained by SimoneJLaudani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_trainer4_pipeline_en_5.5.0_3.0_1725777295152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_trainer4_pipeline_en_5.5.0_3.0_1725777295152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("test_trainer4_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("test_trainer4_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_trainer4_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|246.1 MB| + +## References + +https://huggingface.co/SimoneJLaudani/test_trainer4 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-testing_en.md b/docs/_posts/ahmedlone127/2024-09-08-testing_en.md new file mode 100644 index 00000000000000..56ca587d78e668 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-testing_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English testing RoBertaForSequenceClassification from NoCaptain +author: John Snow Labs +name: testing +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testing` is a English model originally trained by NoCaptain. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testing_en_5.5.0_3.0_1725779135640.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testing_en_5.5.0_3.0_1725779135640.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("testing","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("testing", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testing| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|309.0 MB| + +## References + +https://huggingface.co/NoCaptain/TESTING \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-tmp_trainer_ubermenchh_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-tmp_trainer_ubermenchh_pipeline_en.md new file mode 100644 index 00000000000000..e3660848d0d2af --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-tmp_trainer_ubermenchh_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tmp_trainer_ubermenchh_pipeline pipeline DistilBertForSequenceClassification from ubermenchh +author: John Snow Labs +name: tmp_trainer_ubermenchh_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tmp_trainer_ubermenchh_pipeline` is a English model originally trained by ubermenchh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tmp_trainer_ubermenchh_pipeline_en_5.5.0_3.0_1725777325993.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tmp_trainer_ubermenchh_pipeline_en_5.5.0_3.0_1725777325993.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tmp_trainer_ubermenchh_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tmp_trainer_ubermenchh_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tmp_trainer_ubermenchh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/ubermenchh/tmp_trainer + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-trainer1f_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-trainer1f_pipeline_en.md new file mode 100644 index 00000000000000..52e46b97518753 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-trainer1f_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trainer1f_pipeline pipeline DistilBertForSequenceClassification from SimoneJLaudani +author: John Snow Labs +name: trainer1f_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trainer1f_pipeline` is a English model originally trained by SimoneJLaudani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trainer1f_pipeline_en_5.5.0_3.0_1725777206683.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trainer1f_pipeline_en_5.5.0_3.0_1725777206683.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trainer1f_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trainer1f_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trainer1f_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/SimoneJLaudani/trainer1F + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-trainer_output_dir_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-trainer_output_dir_pipeline_en.md new file mode 100644 index 00000000000000..d6c5720d7393d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-trainer_output_dir_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trainer_output_dir_pipeline pipeline DistilBertForSequenceClassification from sunithapillai +author: John Snow Labs +name: trainer_output_dir_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trainer_output_dir_pipeline` is a English model originally trained by sunithapillai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trainer_output_dir_pipeline_en_5.5.0_3.0_1725775045236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trainer_output_dir_pipeline_en_5.5.0_3.0_1725775045236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trainer_output_dir_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trainer_output_dir_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trainer_output_dir_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|250.4 MB| + +## References + +https://huggingface.co/sunithapillai/trainer_output_dir + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-trecdl22_crossencoder_roberta_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-trecdl22_crossencoder_roberta_pipeline_en.md new file mode 100644 index 00000000000000..b0a061b4213148 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-trecdl22_crossencoder_roberta_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English trecdl22_crossencoder_roberta_pipeline pipeline RoBertaForSequenceClassification from naver +author: John Snow Labs +name: trecdl22_crossencoder_roberta_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trecdl22_crossencoder_roberta_pipeline` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trecdl22_crossencoder_roberta_pipeline_en_5.5.0_3.0_1725779336555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trecdl22_crossencoder_roberta_pipeline_en_5.5.0_3.0_1725779336555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("trecdl22_crossencoder_roberta_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("trecdl22_crossencoder_roberta_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trecdl22_crossencoder_roberta_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|677.5 MB| + +## References + +https://huggingface.co/naver/trecdl22-crossencoder-roberta + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-tweetcat_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-tweetcat_pipeline_en.md new file mode 100644 index 00000000000000..2b995c2b4428b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-tweetcat_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English tweetcat_pipeline pipeline RoBertaForSequenceClassification from ivanresh +author: John Snow Labs +name: tweetcat_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tweetcat_pipeline` is a English model originally trained by ivanresh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tweetcat_pipeline_en_5.5.0_3.0_1725778648852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tweetcat_pipeline_en_5.5.0_3.0_1725778648852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("tweetcat_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("tweetcat_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tweetcat_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|425.8 MB| + +## References + +https://huggingface.co/ivanresh/TweetCat + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-twitter_roberta_base_topic_latest_en.md b/docs/_posts/ahmedlone127/2024-09-08-twitter_roberta_base_topic_latest_en.md new file mode 100644 index 00000000000000..17be588efa6615 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-twitter_roberta_base_topic_latest_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_roberta_base_topic_latest RoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: twitter_roberta_base_topic_latest +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_roberta_base_topic_latest` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_topic_latest_en_5.5.0_3.0_1725778181832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_roberta_base_topic_latest_en_5.5.0_3.0_1725778181832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_topic_latest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("twitter_roberta_base_topic_latest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_roberta_base_topic_latest| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|468.3 MB| + +## References + +https://huggingface.co/cardiffnlp/twitter-roberta-base-topic-latest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-twitter_spam_classifier_en.md b/docs/_posts/ahmedlone127/2024-09-08-twitter_spam_classifier_en.md new file mode 100644 index 00000000000000..c391fa61aa410b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-twitter_spam_classifier_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English twitter_spam_classifier BertForSequenceClassification from Delphia +author: John Snow Labs +name: twitter_spam_classifier +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitter_spam_classifier` is a English model originally trained by Delphia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitter_spam_classifier_en_5.5.0_3.0_1725761510305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitter_spam_classifier_en_5.5.0_3.0_1725761510305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("twitter_spam_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("twitter_spam_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitter_spam_classifier| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Delphia/twitter-spam-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_en.md b/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_en.md new file mode 100644 index 00000000000000..ae96bc6c65fc6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English updated_distilbert_stance_detection DistilBertForSequenceClassification from alpcaferoglu +author: John Snow Labs +name: updated_distilbert_stance_detection +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, distilbert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`updated_distilbert_stance_detection` is a English model originally trained by alpcaferoglu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/updated_distilbert_stance_detection_en_5.5.0_3.0_1725764437484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/updated_distilbert_stance_detection_en_5.5.0_3.0_1725764437484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = DistilBertForSequenceClassification.pretrained("updated_distilbert_stance_detection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = DistilBertForSequenceClassification.pretrained("updated_distilbert_stance_detection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|updated_distilbert_stance_detection| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|249.4 MB| + +## References + +https://huggingface.co/alpcaferoglu/updated-distilbert-stance-detection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_pipeline_en.md new file mode 100644 index 00000000000000..b22dbfb25381fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-updated_distilbert_stance_detection_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English updated_distilbert_stance_detection_pipeline pipeline DistilBertForSequenceClassification from alpcaferoglu +author: John Snow Labs +name: updated_distilbert_stance_detection_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`updated_distilbert_stance_detection_pipeline` is a English model originally trained by alpcaferoglu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/updated_distilbert_stance_detection_pipeline_en_5.5.0_3.0_1725764449511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/updated_distilbert_stance_detection_pipeline_en_5.5.0_3.0_1725764449511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("updated_distilbert_stance_detection_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("updated_distilbert_stance_detection_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|updated_distilbert_stance_detection_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|249.5 MB| + +## References + +https://huggingface.co/alpcaferoglu/updated-distilbert-stance-detection + +## Included Models + +- DocumentAssembler +- TokenizerModel +- DistilBertForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-usclm_distilbert_base_uncased_mk1_en.md b/docs/_posts/ahmedlone127/2024-09-08-usclm_distilbert_base_uncased_mk1_en.md new file mode 100644 index 00000000000000..f58bb287a39d96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-usclm_distilbert_base_uncased_mk1_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English usclm_distilbert_base_uncased_mk1 DistilBertEmbeddings from hyperdemocracy +author: John Snow Labs +name: usclm_distilbert_base_uncased_mk1 +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, distilbert] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`usclm_distilbert_base_uncased_mk1` is a English model originally trained by hyperdemocracy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/usclm_distilbert_base_uncased_mk1_en_5.5.0_3.0_1725775981394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/usclm_distilbert_base_uncased_mk1_en_5.5.0_3.0_1725775981394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("usclm_distilbert_base_uncased_mk1","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("usclm_distilbert_base_uncased_mk1","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|usclm_distilbert_base_uncased_mk1| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[distilbert]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/hyperdemocracy/usclm-distilbert-base-uncased-mk1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-w2l_en.md b/docs/_posts/ahmedlone127/2024-09-08-w2l_en.md new file mode 100644 index 00000000000000..040fa984ac790b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-w2l_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English w2l RoBertaForSequenceClassification from aloxatel +author: John Snow Labs +name: w2l +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: RoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`w2l` is a English model originally trained by aloxatel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/w2l_en_5.5.0_3.0_1725779238927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/w2l_en_5.5.0_3.0_1725779238927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = RoBertaForSequenceClassification.pretrained("w2l","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = RoBertaForSequenceClassification.pretrained("w2l", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|w2l| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/aloxatel/W2L \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-w2l_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-w2l_pipeline_en.md new file mode 100644 index 00000000000000..88d38359882a59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-w2l_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English w2l_pipeline pipeline RoBertaForSequenceClassification from aloxatel +author: John Snow Labs +name: w2l_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained RoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`w2l_pipeline` is a English model originally trained by aloxatel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/w2l_pipeline_en_5.5.0_3.0_1725779320823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/w2l_pipeline_en_5.5.0_3.0_1725779320823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("w2l_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("w2l_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|w2l_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/aloxatel/W2L + +## Included Models + +- DocumentAssembler +- TokenizerModel +- RoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-whisper_atcosim3_en.md b/docs/_posts/ahmedlone127/2024-09-08-whisper_atcosim3_en.md new file mode 100644 index 00000000000000..b43c53babbe062 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-whisper_atcosim3_en.md @@ -0,0 +1,84 @@ +--- +layout: model +title: English whisper_atcosim3 WhisperForCTC from luigisaetta +author: John Snow Labs +name: whisper_atcosim3 +date: 2024-09-08 +tags: [en, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_atcosim3` is a English model originally trained by luigisaetta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_atcosim3_en_5.5.0_3.0_1725754148710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_atcosim3_en_5.5.0_3.0_1725754148710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_atcosim3","en") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_atcosim3", "en") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_atcosim3| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|en| +|Size:|4.8 GB| + +## References + +https://huggingface.co/luigisaetta/whisper-atcosim3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_mr.md b/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_mr.md new file mode 100644 index 00000000000000..d7f12d5ba6efa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_mr.md @@ -0,0 +1,84 @@ +--- +layout: model +title: Marathi whisper_small_finetuned_common_voice_marathi_marh WhisperForCTC from VinayHajare +author: John Snow Labs +name: whisper_small_finetuned_common_voice_marathi_marh +date: 2024-09-08 +tags: [mr, open_source, onnx, asr, whisper] +task: Automatic Speech Recognition +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: WhisperForCTC +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_finetuned_common_voice_marathi_marh` is a Marathi model originally trained by VinayHajare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_finetuned_common_voice_marathi_marh_mr_5.5.0_3.0_1725753742102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_finetuned_common_voice_marathi_marh_mr_5.5.0_3.0_1725753742102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +audioAssembler = AudioAssembler() \ + .setInputCol("audio_content") \ + .setOutputCol("audio_assembler") + +speechToText = WhisperForCTC.pretrained("whisper_small_finetuned_common_voice_marathi_marh","mr") \ + .setInputCols(["audio_assembler"]) \ + .setOutputCol("text") + +pipeline = Pipeline().setStages([audioAssembler, speechToText]) +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val audioAssembler = new DocumentAssembler() + .setInputCols("audio_content") + .setOutputCols("audio_assembler") + +val speechToText = WhisperForCTC.pretrained("whisper_small_finetuned_common_voice_marathi_marh", "mr") + .setInputCols(Array("audio_assembler")) + .setOutputCol("text") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, speechToText)) +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_finetuned_common_voice_marathi_marh| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[audio_assembler]| +|Output Labels:|[text]| +|Language:|mr| +|Size:|1.7 GB| + +## References + +https://huggingface.co/VinayHajare/whisper-small-finetuned-common-voice-mr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr.md b/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr.md new file mode 100644 index 00000000000000..006f2ee87cdb88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr.md @@ -0,0 +1,69 @@ +--- +layout: model +title: Marathi whisper_small_finetuned_common_voice_marathi_marh_pipeline pipeline WhisperForCTC from VinayHajare +author: John Snow Labs +name: whisper_small_finetuned_common_voice_marathi_marh_pipeline +date: 2024-09-08 +tags: [mr, open_source, pipeline, onnx] +task: Automatic Speech Recognition +language: mr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained WhisperForCTC, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`whisper_small_finetuned_common_voice_marathi_marh_pipeline` is a Marathi model originally trained by VinayHajare. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr_5.5.0_3.0_1725753827575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/whisper_small_finetuned_common_voice_marathi_marh_pipeline_mr_5.5.0_3.0_1725753827575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("whisper_small_finetuned_common_voice_marathi_marh_pipeline", lang = "mr") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("whisper_small_finetuned_common_voice_marathi_marh_pipeline", lang = "mr") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|whisper_small_finetuned_common_voice_marathi_marh_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|mr| +|Size:|1.7 GB| + +## References + +https://huggingface.co/VinayHajare/whisper-small-finetuned-common-voice-mr + +## Included Models + +- AudioAssembler +- WhisperForCTC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_final_mixed_aug_insert_bert_2_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_final_mixed_aug_insert_bert_2_en.md new file mode 100644 index 00000000000000..c9d4428d7d0833 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_final_mixed_aug_insert_bert_2_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_final_mixed_aug_insert_bert_2 XlmRoBertaForSequenceClassification from ThuyNT03 +author: John Snow Labs +name: xlm_roberta_base_final_mixed_aug_insert_bert_2 +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_final_mixed_aug_insert_bert_2` is a English model originally trained by ThuyNT03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_insert_bert_2_en_5.5.0_3.0_1725780608745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_final_mixed_aug_insert_bert_2_en_5.5.0_3.0_1725780608745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_mixed_aug_insert_bert_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_base_final_mixed_aug_insert_bert_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_final_mixed_aug_insert_bert_2| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|795.4 MB| + +## References + +https://huggingface.co/ThuyNT03/xlm-roberta-base-Final_Mixed-aug_insert_BERT-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_malagasy_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_malagasy_en.md new file mode 100644 index 00000000000000..1bf62f52bcdff5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_malagasy_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_malagasy XlmRoBertaEmbeddings from Davlan +author: John Snow Labs +name: xlm_roberta_base_finetuned_malagasy +date: 2024-09-08 +tags: [en, open_source, onnx, embeddings, xlm_roberta] +task: Embeddings +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_malagasy` is a English model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_malagasy_en_5.5.0_3.0_1725770400121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_malagasy_en_5.5.0_3.0_1725770400121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_malagasy","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, embeddings]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val embeddings = XlmRoBertaEmbeddings.pretrained("xlm_roberta_base_finetuned_malagasy","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) +val data = Seq("I love spark-nlp").toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_malagasy| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[xlm_roberta]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Davlan/xlm-roberta-base-finetuned-malagasy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_en.md new file mode 100644 index 00000000000000..714d2b5d20c36f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_likejazz XlmRoBertaForTokenClassification from likejazz +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_likejazz +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_likejazz` is a English model originally trained by likejazz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_likejazz_en_5.5.0_3.0_1725773891024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_likejazz_en_5.5.0_3.0_1725773891024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_likejazz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_all_likejazz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_likejazz| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/likejazz/xlm-roberta-base-finetuned-panx-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en.md new file mode 100644 index 00000000000000..1b965f85b03d10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_likejazz_pipeline pipeline XlmRoBertaForTokenClassification from likejazz +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_likejazz_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_likejazz_pipeline` is a English model originally trained by likejazz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en_5.5.0_3.0_1725773969676.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_likejazz_pipeline_en_5.5.0_3.0_1725773969676.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_likejazz_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_likejazz_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_likejazz_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|854.4 MB| + +## References + +https://huggingface.co/likejazz/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en.md new file mode 100644 index 00000000000000..662de6424703c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline pipeline XlmRoBertaForTokenClassification from pockypocky +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline` is a English model originally trained by pockypocky. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en_5.5.0_3.0_1725772687119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline_en_5.5.0_3.0_1725772687119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_all_pockypocky_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|848.0 MB| + +## References + +https://huggingface.co/pockypocky/xlm-roberta-base-finetuned-panx-all + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en.md new file mode 100644 index 00000000000000..c04efd90f7e29f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline pipeline XlmRoBertaForTokenClassification from iis2009002 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline` is a English model originally trained by iis2009002. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en_5.5.0_3.0_1725774396337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline_en_5.5.0_3.0_1725774396337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_english_iis2009002_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|826.4 MB| + +## References + +https://huggingface.co/iis2009002/xlm-roberta-base-finetuned-panx-en + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_french_goldenk_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_french_goldenk_en.md new file mode 100644 index 00000000000000..029bc5af4b5e6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_french_goldenk_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_french_goldenk XlmRoBertaForTokenClassification from goldenk +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_french_goldenk +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_french_goldenk` is a English model originally trained by goldenk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_goldenk_en_5.5.0_3.0_1725773444523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_french_goldenk_en_5.5.0_3.0_1725773444523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_goldenk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_french_goldenk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_french_goldenk| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.9 MB| + +## References + +https://huggingface.co/goldenk/xlm-roberta-base-finetuned-panx-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en.md new file mode 100644 index 00000000000000..05e28e962c868e --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline pipeline XlmRoBertaForTokenClassification from Fernweh23 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline` is a English model originally trained by Fernweh23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en_5.5.0_3.0_1725773650533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline_en_5.5.0_3.0_1725773650533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_fernweh23_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/Fernweh23/xlm-roberta-base-finetuned-panx-de + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_alkampfer_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_alkampfer_en.md new file mode 100644 index 00000000000000..8ba705da4ac29a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_alkampfer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_alkampfer XlmRoBertaForTokenClassification from alkampfer +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_alkampfer +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_alkampfer` is a English model originally trained by alkampfer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_alkampfer_en_5.5.0_3.0_1725773570769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_alkampfer_en_5.5.0_3.0_1725773570769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_alkampfer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_french_alkampfer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_alkampfer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|858.2 MB| + +## References + +https://huggingface.co/alkampfer/xlm-roberta-base-finetuned-panx-de-fr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en.md new file mode 100644 index 00000000000000..898421848b53aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline pipeline XlmRoBertaForTokenClassification from buruzaemon +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline` is a English model originally trained by buruzaemon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en_5.5.0_3.0_1725772938953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline_en_5.5.0_3.0_1725772938953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_french_buruzaemon_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|843.4 MB| + +## References + +https://huggingface.co/buruzaemon/xlm-roberta-base-finetuned-panx-de-fr + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_nitin1690_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_nitin1690_en.md new file mode 100644 index 00000000000000..f49be035f17979 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_german_nitin1690_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_german_nitin1690 XlmRoBertaForTokenClassification from nitin1690 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_german_nitin1690 +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_german_nitin1690` is a English model originally trained by nitin1690. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nitin1690_en_5.5.0_3.0_1725773061099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_german_nitin1690_en_5.5.0_3.0_1725773061099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nitin1690","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_german_nitin1690", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_german_nitin1690| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|840.8 MB| + +## References + +https://huggingface.co/nitin1690/xlm-roberta-base-finetuned-panx-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en.md new file mode 100644 index 00000000000000..44e587c06fb86c --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline pipeline XlmRoBertaForTokenClassification from AAA01101312 +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline` is a English model originally trained by AAA01101312. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en_5.5.0_3.0_1725774210789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline_en_5.5.0_3.0_1725774210789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_aaa01101312_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|816.8 MB| + +## References + +https://huggingface.co/AAA01101312/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_en.md new file mode 100644 index 00000000000000..5ee958d573598b --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_aiventurer XlmRoBertaForTokenClassification from AIventurer +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_aiventurer +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_aiventurer` is a English model originally trained by AIventurer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aiventurer_en_5.5.0_3.0_1725772664578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aiventurer_en_5.5.0_3.0_1725772664578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_aiventurer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_aiventurer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_aiventurer| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/AIventurer/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en.md new file mode 100644 index 00000000000000..90f890bbaedaa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline pipeline XlmRoBertaForTokenClassification from AIventurer +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline` is a English model originally trained by AIventurer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en_5.5.0_3.0_1725772746552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline_en_5.5.0_3.0_1725772746552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_aiventurer_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|828.6 MB| + +## References + +https://huggingface.co/AIventurer/xlm-roberta-base-finetuned-panx-it + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_leosol_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_leosol_en.md new file mode 100644 index 00000000000000..de0984bd26c632 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_panx_italian_leosol_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_panx_italian_leosol XlmRoBertaForTokenClassification from leosol +author: John Snow Labs +name: xlm_roberta_base_finetuned_panx_italian_leosol +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_panx_italian_leosol` is a English model originally trained by leosol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_leosol_en_5.5.0_3.0_1725773719109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_panx_italian_leosol_en_5.5.0_3.0_1725773719109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_leosol","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_panx_italian_leosol", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_panx_italian_leosol| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|816.7 MB| + +## References + +https://huggingface.co/leosol/xlm-roberta-base-finetuned-panx-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_en.md new file mode 100644 index 00000000000000..d443dbc953cc40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_wikiann_hindi XlmRoBertaForTokenClassification from Someman +author: John Snow Labs +name: xlm_roberta_base_finetuned_wikiann_hindi +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_wikiann_hindi` is a English model originally trained by Someman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_wikiann_hindi_en_5.5.0_3.0_1725772511570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_wikiann_hindi_en_5.5.0_3.0_1725772511570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_wikiann_hindi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_finetuned_wikiann_hindi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_wikiann_hindi| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|838.5 MB| + +## References + +https://huggingface.co/Someman/xlm-roberta-base-finetuned-wikiann-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en.md new file mode 100644 index 00000000000000..e48ff0f0066fa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_finetuned_wikiann_hindi_pipeline pipeline XlmRoBertaForTokenClassification from Someman +author: John Snow Labs +name: xlm_roberta_base_finetuned_wikiann_hindi_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_finetuned_wikiann_hindi_pipeline` is a English model originally trained by Someman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en_5.5.0_3.0_1725772574794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_finetuned_wikiann_hindi_pipeline_en_5.5.0_3.0_1725772574794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_finetuned_wikiann_hindi_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_finetuned_wikiann_hindi_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_finetuned_wikiann_hindi_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|838.5 MB| + +## References + +https://huggingface.co/Someman/xlm-roberta-base-finetuned-wikiann-hi + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en.md new file mode 100644 index 00000000000000..04e3dc9bb2d31a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline pipeline XlmRoBertaForSequenceClassification from vocabtrimmer +author: John Snow Labs +name: xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline` is a English model originally trained by vocabtrimmer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en_5.5.0_3.0_1725780398776.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline_en_5.5.0_3.0_1725780398776.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_tweet_sentiment_spanish_trimmed_spanish_60000_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|442.0 MB| + +## References + +https://huggingface.co/vocabtrimmer/xlm-roberta-base-tweet-sentiment-es-trimmed-es-60000 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_en.md new file mode 100644 index 00000000000000..e2b9a3ba9d2602 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_base_word_shopsign_nepal_bhasa XlmRoBertaForTokenClassification from HyungYoun +author: John Snow Labs +name: xlm_roberta_base_word_shopsign_nepal_bhasa +date: 2024-09-08 +tags: [en, open_source, onnx, token_classification, xlm_roberta, ner] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_word_shopsign_nepal_bhasa` is a English model originally trained by HyungYoun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_word_shopsign_nepal_bhasa_en_5.5.0_3.0_1725774277110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_word_shopsign_nepal_bhasa_en_5.5.0_3.0_1725774277110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_word_shopsign_nepal_bhasa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val tokenClassifier = XlmRoBertaForTokenClassification.pretrained("xlm_roberta_base_word_shopsign_nepal_bhasa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_word_shopsign_nepal_bhasa| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/HyungYoun/xlm-roberta-base-word-shopsign-new \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en.md new file mode 100644 index 00000000000000..54ef81712856fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en.md @@ -0,0 +1,70 @@ +--- +layout: model +title: English xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline pipeline XlmRoBertaForTokenClassification from HyungYoun +author: John Snow Labs +name: xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForTokenClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline` is a English model originally trained by HyungYoun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en_5.5.0_3.0_1725774404747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline_en_5.5.0_3.0_1725774404747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_base_word_shopsign_nepal_bhasa_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|794.3 MB| + +## References + +https://huggingface.co/HyungYoun/xlm-roberta-base-word-shopsign-new + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForTokenClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr.md new file mode 100644 index 00000000000000..420941e70368cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr.md @@ -0,0 +1,94 @@ +--- +layout: model +title: French xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes XlmRoBertaForSequenceClassification from waboucay +author: John Snow Labs +name: xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes +date: 2024-09-08 +tags: [fr, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: fr +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes` is a French model originally trained by waboucay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr_5.5.0_3.0_1725781086233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes_fr_5.5.0_3.0_1725781086233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_longformer_base_4096_xnli_french_3_classes_rua_wl_3_classes| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|fr| +|Size:|1.1 GB| + +## References + +https://huggingface.co/waboucay/xlm-roberta-longformer-base-4096-xnli_fr_3_classes-rua_wl_3_classes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_sentiment_romanurdu_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_sentiment_romanurdu_en.md new file mode 100644 index 00000000000000..588789f13c8b7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_roberta_sentiment_romanurdu_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_roberta_sentiment_romanurdu XlmRoBertaForSequenceClassification from HowMannyMore +author: John Snow Labs +name: xlm_roberta_sentiment_romanurdu +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_roberta_sentiment_romanurdu` is a English model originally trained by HowMannyMore. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_roberta_sentiment_romanurdu_en_5.5.0_3.0_1725779911242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_roberta_sentiment_romanurdu_en_5.5.0_3.0_1725779911242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_sentiment_romanurdu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_roberta_sentiment_romanurdu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_roberta_sentiment_romanurdu| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/HowMannyMore/xlm-roberta-sentiment-romanurdu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlm_twitter_politics_sentiment_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlm_twitter_politics_sentiment_en.md new file mode 100644 index 00000000000000..0a23bf629601cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlm_twitter_politics_sentiment_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xlm_twitter_politics_sentiment XlmRoBertaForSequenceClassification from cardiffnlp +author: John Snow Labs +name: xlm_twitter_politics_sentiment +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, xlm_roberta] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlm_twitter_politics_sentiment` is a English model originally trained by cardiffnlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlm_twitter_politics_sentiment_en_5.5.0_3.0_1725779909966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlm_twitter_politics_sentiment_en_5.5.0_3.0_1725779909966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_twitter_politics_sentiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = XlmRoBertaForSequenceClassification.pretrained("xlm_twitter_politics_sentiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlm_twitter_politics_sentiment| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|1.0 GB| + +## References + +https://huggingface.co/cardiffnlp/xlm-twitter-politics-sentiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi.md b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi.md new file mode 100644 index 00000000000000..2a998142382512 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi.md @@ -0,0 +1,104 @@ +--- +layout: model +title: Hindi XlmRobertaForSequenceClassification Cased model (from rohansingh) +author: John Snow Labs +name: xlmroberta_classifier_autonlp_fake_news_detection_system_29906863 +date: 2024-09-08 +tags: [hi, open_source, xlm_roberta, sequence_classification, classification, onnx] +task: Text Classification +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRobertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `autonlp-Fake-news-detection-system-29906863` is a Hindi model originally trained by `rohansingh`. + +## Predicted Entities + +`positive`, `negative` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi_5.5.0_3.0_1725780717614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_hi_5.5.0_3.0_1725780717614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +seq_classifier = XlmRoBertaForSequenceClassification.pretrained("xlmroberta_classifier_autonlp_fake_news_detection_system_29906863","hi") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, seq_classifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val seq_classifier = XlmRoBertaForSequenceClassification.pretrained("xlmroberta_classifier_autonlp_fake_news_detection_system_29906863","hi") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, seq_classifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("hi.classify.xlmr_roberta.news.").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_classifier_autonlp_fake_news_detection_system_29906863| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|hi| +|Size:|786.9 MB| + +## References + +References + +- https://huggingface.co/rohansingh/autonlp-Fake-news-detection-system-29906863 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi.md b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi.md new file mode 100644 index 00000000000000..01c5a2be3a2c9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Hindi xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline pipeline XlmRoBertaForSequenceClassification from rohansingh +author: John Snow Labs +name: xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline +date: 2024-09-08 +tags: [hi, open_source, pipeline, onnx] +task: Text Classification +language: hi +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline` is a Hindi model originally trained by rohansingh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi_5.5.0_3.0_1725780850195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline_hi_5.5.0_3.0_1725780850195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline", lang = "hi") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline", lang = "hi") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_classifier_autonlp_fake_news_detection_system_29906863_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|hi| +|Size:|786.9 MB| + +## References + +https://huggingface.co/rohansingh/autonlp-Fake-news-detection-system-29906863 + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta.md b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta.md new file mode 100644 index 00000000000000..3605388fb3792d --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta.md @@ -0,0 +1,70 @@ +--- +layout: model +title: Tamil xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline pipeline XlmRoBertaForSequenceClassification from Hate-speech-CNERG +author: John Snow Labs +name: xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline +date: 2024-09-08 +tags: [ta, open_source, pipeline, onnx] +task: Text Classification +language: ta +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline` is a Tamil model originally trained by Hate-speech-CNERG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta_5.5.0_3.0_1725781627635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline_ta_5.5.0_3.0_1725781627635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline", lang = "ta") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline", lang = "ta") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_classifier_deoffxlmr_mono_tamil_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|ta| +|Size:|1.0 GB| + +## References + +https://huggingface.co/Hate-speech-CNERG/deoffxlmr-mono-tamil + +## Included Models + +- DocumentAssembler +- TokenizerModel +- XlmRoBertaForSequenceClassification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_ta.md b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_ta.md new file mode 100644 index 00000000000000..ba315b307801c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmroberta_classifier_deoffxlmr_mono_tamil_ta.md @@ -0,0 +1,105 @@ +--- +layout: model +title: Tamil XlmRobertaForSequenceClassification Cased model (from Hate-speech-CNERG) +author: John Snow Labs +name: xlmroberta_classifier_deoffxlmr_mono_tamil +date: 2024-09-08 +tags: [ta, open_source, xlm_roberta, sequence_classification, classification, onnx] +task: Text Classification +language: ta +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRobertaForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `deoffxlmr-mono-tamil` is a Tamil model originally trained by `Hate-speech-CNERG`. + +## Predicted Entities + +`Not_offensive`, `Off_target_other`, `Off_target_group`, `Profanity`, `Off_target_ind`, `Not_in_intended_language` + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_deoffxlmr_mono_tamil_ta_5.5.0_3.0_1725781579163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmroberta_classifier_deoffxlmr_mono_tamil_ta_5.5.0_3.0_1725781579163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +seq_classifier = XlmRoBertaForSequenceClassification.pretrained("xlmroberta_classifier_deoffxlmr_mono_tamil","ta") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("class") + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, seq_classifier]) + +data = spark.createDataFrame([["PUT YOUR STRING HERE"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val seq_classifier = XlmRoBertaForSequenceClassification.pretrained("xlmroberta_classifier_deoffxlmr_mono_tamil","ta") + .setInputCols(Array("document", "token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, seq_classifier)) + +val data = Seq("PUT YOUR STRING HERE").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("ta.classify.xlmr_roberta").predict("""PUT YOUR STRING HERE""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmroberta_classifier_deoffxlmr_mono_tamil| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|ta| +|Size:|1.0 GB| + +## References + +References + +- https://huggingface.co/Hate-speech-CNERG/deoffxlmr-mono-tamil +- https://www.aclweb.org/anthology/2021.dravidianlangtech-1.38/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_en.md new file mode 100644 index 00000000000000..5884a3563b8b9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_en.md @@ -0,0 +1,86 @@ +--- +layout: model +title: English xlmrobertalongforquestionanswering_base_squad2_512_4096 XlmRoBertaForQuestionAnswering from sadaqabdo +author: John Snow Labs +name: xlmrobertalongforquestionanswering_base_squad2_512_4096 +date: 2024-09-08 +tags: [en, open_source, onnx, question_answering, xlm_roberta] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: XlmRoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrobertalongforquestionanswering_base_squad2_512_4096` is a English model originally trained by sadaqabdo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrobertalongforquestionanswering_base_squad2_512_4096_en_5.5.0_3.0_1725754979237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrobertalongforquestionanswering_base_squad2_512_4096_en_5.5.0_3.0_1725754979237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + +spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmrobertalongforquestionanswering_base_squad2_512_4096","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([documentAssembler, spanClassifier]) +data = spark.createDataFrame([["What framework do I use?","I use spark-nlp."]]).toDF("document_question", "document_context") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = XlmRoBertaForQuestionAnswering.pretrained("xlmrobertalongforquestionanswering_base_squad2_512_4096", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) +val data = Seq("What framework do I use?","I use spark-nlp.").toDS.toDF("document_question", "document_context") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrobertalongforquestionanswering_base_squad2_512_4096| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|883.5 MB| + +## References + +https://huggingface.co/sadaqabdo/XLMRobertaLongForQuestionAnswering-base-squad2-512-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en.md b/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en.md new file mode 100644 index 00000000000000..b65e4af964aefe --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en.md @@ -0,0 +1,69 @@ +--- +layout: model +title: English xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline pipeline XlmRoBertaForQuestionAnswering from sadaqabdo +author: John Snow Labs +name: xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline +date: 2024-09-08 +tags: [en, open_source, pipeline, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +annotator: PipelineModel +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaForQuestionAnswering, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline` is a English model originally trained by sadaqabdo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en_5.5.0_3.0_1725755043435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline_en_5.5.0_3.0_1725755043435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +pipeline = PretrainedPipeline("xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline", lang = "en") +annotations = pipeline.transform(df) + +``` +```scala + +val pipeline = new PretrainedPipeline("xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline", lang = "en") +val annotations = pipeline.transform(df) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xlmrobertalongforquestionanswering_base_squad2_512_4096_pipeline| +|Type:|pipeline| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Language:|en| +|Size:|883.5 MB| + +## References + +https://huggingface.co/sadaqabdo/XLMRobertaLongForQuestionAnswering-base-squad2-512-4096 + +## Included Models + +- MultiDocumentAssembler +- XlmRoBertaForQuestionAnswering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2024-09-08-xtremedistil_l6_h384_uncased_en.md b/docs/_posts/ahmedlone127/2024-09-08-xtremedistil_l6_h384_uncased_en.md new file mode 100644 index 00000000000000..ca0d454cfcc196 --- /dev/null +++ b/docs/_posts/ahmedlone127/2024-09-08-xtremedistil_l6_h384_uncased_en.md @@ -0,0 +1,94 @@ +--- +layout: model +title: English xtremedistil_l6_h384_uncased BertForSequenceClassification from microsoft +author: John Snow Labs +name: xtremedistil_l6_h384_uncased +date: 2024-09-08 +tags: [en, open_source, onnx, sequence_classification, bert] +task: Text Classification +language: en +edition: Spark NLP 5.5.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xtremedistil_l6_h384_uncased` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xtremedistil_l6_h384_uncased_en_5.5.0_3.0_1725761334263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xtremedistil_l6_h384_uncased_en_5.5.0_3.0_1725761334263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + +documentAssembler = DocumentAssembler() \ + .setInputCol('text') \ + .setOutputCol('document') + +tokenizer = Tokenizer() \ + .setInputCols(['document']) \ + .setOutputCol('token') + +sequenceClassifier = BertForSequenceClassification.pretrained("xtremedistil_l6_h384_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([documentAssembler, tokenizer, sequenceClassifier]) +data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text") +pipelineModel = pipeline.fit(data) +pipelineDF = pipelineModel.transform(data) + +``` +```scala + +val documentAssembler = new DocumentAssembler() + .setInputCols("text") + .setOutputCols("document") + +val tokenizer = new Tokenizer() + .setInputCols(Array("document")) + .setOutputCol("token") + +val sequenceClassifier = BertForSequenceClassification.pretrained("xtremedistil_l6_h384_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, sequenceClassifier)) +val data = Seq("I love spark-nlp").toDS.toDF("text") +val pipelineModel = pipeline.fit(data) +val pipelineDF = pipelineModel.transform(data) + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xtremedistil_l6_h384_uncased| +|Compatibility:|Spark NLP 5.5.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|84.2 MB| + +## References + +https://huggingface.co/microsoft/xtremedistil-l6-h384-uncased \ No newline at end of file